{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 38928, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003082297338949964, "grad_norm": 0.5791122913360596, "learning_rate": 1.2325390304026295e-05, "loss": 11.5149, "step": 3 }, { "epoch": 0.0006164594677899928, "grad_norm": 1.7733268737792969, "learning_rate": 2.465078060805259e-05, "loss": 11.504, "step": 6 }, { "epoch": 0.0009246892016849893, "grad_norm": 3.047468423843384, "learning_rate": 3.697617091207888e-05, "loss": 11.4391, "step": 9 }, { "epoch": 0.0012329189355799856, "grad_norm": 3.346104621887207, "learning_rate": 4.930156121610518e-05, "loss": 11.2635, "step": 12 }, { "epoch": 0.001541148669474982, "grad_norm": 3.2847304344177246, "learning_rate": 6.162695152013147e-05, "loss": 10.9864, "step": 15 }, { "epoch": 0.0018493784033699785, "grad_norm": 3.2979729175567627, "learning_rate": 7.395234182415776e-05, "loss": 10.6487, "step": 18 }, { "epoch": 0.0021576081372649747, "grad_norm": 3.445396661758423, "learning_rate": 8.627773212818406e-05, "loss": 10.2667, "step": 21 }, { "epoch": 0.0024658378711599712, "grad_norm": 3.620011568069458, "learning_rate": 9.860312243221036e-05, "loss": 9.8679, "step": 24 }, { "epoch": 0.002774067605054968, "grad_norm": 3.66719913482666, "learning_rate": 0.00011092851273623665, "loss": 9.4532, "step": 27 }, { "epoch": 0.003082297338949964, "grad_norm": 3.709792375564575, "learning_rate": 0.00012325390304026294, "loss": 9.0393, "step": 30 }, { "epoch": 0.0033905270728449605, "grad_norm": 3.5757813453674316, "learning_rate": 0.00013557929334428925, "loss": 8.6449, "step": 33 }, { "epoch": 0.003698756806739957, "grad_norm": 3.200732707977295, "learning_rate": 0.00014790468364831553, "loss": 8.2932, "step": 36 }, { "epoch": 0.004006986540634953, "grad_norm": 2.7463502883911133, "learning_rate": 0.0001602300739523418, "loss": 8.0151, "step": 39 }, { "epoch": 0.004315216274529949, "grad_norm": 1.961776852607727, "learning_rate": 0.00017255546425636812, "loss": 7.7988, "step": 42 }, { "epoch": 0.004623446008424946, "grad_norm": 1.5326848030090332, "learning_rate": 0.0001848808545603944, "loss": 7.6688, "step": 45 }, { "epoch": 0.0049316757423199424, "grad_norm": 2.266148090362549, "learning_rate": 0.00019720624486442071, "loss": 7.6276, "step": 48 }, { "epoch": 0.005239905476214939, "grad_norm": 2.563206672668457, "learning_rate": 0.000209531635168447, "loss": 7.5954, "step": 51 }, { "epoch": 0.005548135210109936, "grad_norm": 1.9326294660568237, "learning_rate": 0.0002218570254724733, "loss": 7.5136, "step": 54 }, { "epoch": 0.005856364944004932, "grad_norm": 1.0131096839904785, "learning_rate": 0.0002341824157764996, "loss": 7.4167, "step": 57 }, { "epoch": 0.006164594677899928, "grad_norm": 1.1923354864120483, "learning_rate": 0.00024650780608052587, "loss": 7.3273, "step": 60 }, { "epoch": 0.006472824411794925, "grad_norm": 1.2690105438232422, "learning_rate": 0.00025883319638455215, "loss": 7.2615, "step": 63 }, { "epoch": 0.006781054145689921, "grad_norm": 0.8375588059425354, "learning_rate": 0.0002711585866885785, "loss": 7.2088, "step": 66 }, { "epoch": 0.007089283879584917, "grad_norm": 0.719011127948761, "learning_rate": 0.0002834839769926048, "loss": 7.1474, "step": 69 }, { "epoch": 0.007397513613479914, "grad_norm": 0.7095780372619629, "learning_rate": 0.00029580936729663106, "loss": 7.0923, "step": 72 }, { "epoch": 0.00770574334737491, "grad_norm": 0.5094121098518372, "learning_rate": 0.00030813475760065734, "loss": 7.0406, "step": 75 }, { "epoch": 0.008013973081269906, "grad_norm": 0.4085525572299957, "learning_rate": 0.0003204601479046836, "loss": 6.9586, "step": 78 }, { "epoch": 0.008322202815164903, "grad_norm": 0.4122146666049957, "learning_rate": 0.0003327855382087099, "loss": 6.9295, "step": 81 }, { "epoch": 0.008630432549059899, "grad_norm": 0.41669413447380066, "learning_rate": 0.00034511092851273624, "loss": 6.8629, "step": 84 }, { "epoch": 0.008938662282954896, "grad_norm": 0.40379494428634644, "learning_rate": 0.0003574363188167625, "loss": 6.7987, "step": 87 }, { "epoch": 0.009246892016849893, "grad_norm": 0.4202142059803009, "learning_rate": 0.0003697617091207888, "loss": 6.7779, "step": 90 }, { "epoch": 0.009555121750744888, "grad_norm": 0.3880140781402588, "learning_rate": 0.0003820870994248151, "loss": 6.7204, "step": 93 }, { "epoch": 0.009863351484639885, "grad_norm": 0.419883668422699, "learning_rate": 0.00039441248972884143, "loss": 6.6553, "step": 96 }, { "epoch": 0.010171581218534882, "grad_norm": 0.4570387601852417, "learning_rate": 0.0004067378800328677, "loss": 6.5936, "step": 99 }, { "epoch": 0.010479810952429877, "grad_norm": 0.5889524221420288, "learning_rate": 0.000419063270336894, "loss": 6.5271, "step": 102 }, { "epoch": 0.010788040686324874, "grad_norm": 0.5578189492225647, "learning_rate": 0.0004313886606409203, "loss": 6.4701, "step": 105 }, { "epoch": 0.011096270420219871, "grad_norm": 0.4710846245288849, "learning_rate": 0.0004437140509449466, "loss": 6.4254, "step": 108 }, { "epoch": 0.011404500154114866, "grad_norm": 0.30558162927627563, "learning_rate": 0.0004560394412489729, "loss": 6.4217, "step": 111 }, { "epoch": 0.011712729888009863, "grad_norm": 0.3634621500968933, "learning_rate": 0.0004683648315529992, "loss": 6.3422, "step": 114 }, { "epoch": 0.01202095962190486, "grad_norm": 0.369437575340271, "learning_rate": 0.00048069022185702546, "loss": 6.303, "step": 117 }, { "epoch": 0.012329189355799856, "grad_norm": 0.7418856024742126, "learning_rate": 0.0004930156121610517, "loss": 6.2535, "step": 120 }, { "epoch": 0.012637419089694853, "grad_norm": 0.855678915977478, "learning_rate": 0.000505341002465078, "loss": 6.2037, "step": 123 }, { "epoch": 0.01294564882358985, "grad_norm": 0.42414671182632446, "learning_rate": 0.0005176663927691043, "loss": 6.175, "step": 126 }, { "epoch": 0.013253878557484845, "grad_norm": 0.44572392106056213, "learning_rate": 0.0005299917830731307, "loss": 6.1548, "step": 129 }, { "epoch": 0.013562108291379842, "grad_norm": 0.24437515437602997, "learning_rate": 0.000542317173377157, "loss": 6.0967, "step": 132 }, { "epoch": 0.013870338025274839, "grad_norm": 0.4750615954399109, "learning_rate": 0.0005546425636811833, "loss": 6.0776, "step": 135 }, { "epoch": 0.014178567759169834, "grad_norm": 0.9504273533821106, "learning_rate": 0.0005669679539852095, "loss": 6.0471, "step": 138 }, { "epoch": 0.014486797493064831, "grad_norm": 0.5452646613121033, "learning_rate": 0.0005792933442892358, "loss": 5.9987, "step": 141 }, { "epoch": 0.014795027226959828, "grad_norm": 0.4314074218273163, "learning_rate": 0.0005916187345932621, "loss": 5.9874, "step": 144 }, { "epoch": 0.015103256960854823, "grad_norm": 0.48243793845176697, "learning_rate": 0.0006039441248972884, "loss": 5.962, "step": 147 }, { "epoch": 0.01541148669474982, "grad_norm": 0.7570855617523193, "learning_rate": 0.0006162695152013147, "loss": 5.921, "step": 150 }, { "epoch": 0.015719716428644816, "grad_norm": 0.4731276333332062, "learning_rate": 0.000628594905505341, "loss": 5.9032, "step": 153 }, { "epoch": 0.016027946162539813, "grad_norm": 0.3078465759754181, "learning_rate": 0.0006409202958093672, "loss": 5.8932, "step": 156 }, { "epoch": 0.01633617589643481, "grad_norm": 0.3441495895385742, "learning_rate": 0.0006532456861133935, "loss": 5.8695, "step": 159 }, { "epoch": 0.016644405630329807, "grad_norm": 0.43056854605674744, "learning_rate": 0.0006655710764174198, "loss": 5.8642, "step": 162 }, { "epoch": 0.016952635364224804, "grad_norm": 1.3783445358276367, "learning_rate": 0.0006778964667214461, "loss": 5.845, "step": 165 }, { "epoch": 0.017260865098119797, "grad_norm": 0.77984619140625, "learning_rate": 0.0006902218570254725, "loss": 5.8216, "step": 168 }, { "epoch": 0.017569094832014794, "grad_norm": 0.2990098297595978, "learning_rate": 0.0007025472473294988, "loss": 5.7801, "step": 171 }, { "epoch": 0.01787732456590979, "grad_norm": 0.29485219717025757, "learning_rate": 0.000714872637633525, "loss": 5.7757, "step": 174 }, { "epoch": 0.018185554299804788, "grad_norm": 0.4363936483860016, "learning_rate": 0.0007271980279375513, "loss": 5.7598, "step": 177 }, { "epoch": 0.018493784033699785, "grad_norm": 0.8902605175971985, "learning_rate": 0.0007395234182415776, "loss": 5.7719, "step": 180 }, { "epoch": 0.018802013767594782, "grad_norm": 0.7133249044418335, "learning_rate": 0.0007518488085456039, "loss": 5.7568, "step": 183 }, { "epoch": 0.019110243501489776, "grad_norm": 0.9105846881866455, "learning_rate": 0.0007641741988496302, "loss": 5.7191, "step": 186 }, { "epoch": 0.019418473235384773, "grad_norm": 0.9680726528167725, "learning_rate": 0.0007764995891536565, "loss": 5.7077, "step": 189 }, { "epoch": 0.01972670296927977, "grad_norm": 0.535446047782898, "learning_rate": 0.0007888249794576829, "loss": 5.6982, "step": 192 }, { "epoch": 0.020034932703174767, "grad_norm": 0.7894541621208191, "learning_rate": 0.0008011503697617091, "loss": 5.668, "step": 195 }, { "epoch": 0.020343162437069764, "grad_norm": 0.6975138187408447, "learning_rate": 0.0008134757600657354, "loss": 5.6432, "step": 198 }, { "epoch": 0.02065139217096476, "grad_norm": 0.6306262016296387, "learning_rate": 0.0008258011503697617, "loss": 5.6647, "step": 201 }, { "epoch": 0.020959621904859754, "grad_norm": 0.5615081787109375, "learning_rate": 0.000838126540673788, "loss": 5.6208, "step": 204 }, { "epoch": 0.02126785163875475, "grad_norm": 0.6468993425369263, "learning_rate": 0.0008504519309778143, "loss": 5.6272, "step": 207 }, { "epoch": 0.02157608137264975, "grad_norm": 0.8359414339065552, "learning_rate": 0.0008627773212818406, "loss": 5.6114, "step": 210 }, { "epoch": 0.021884311106544745, "grad_norm": 0.8909689784049988, "learning_rate": 0.0008751027115858668, "loss": 5.5957, "step": 213 }, { "epoch": 0.022192540840439742, "grad_norm": 0.39673465490341187, "learning_rate": 0.0008874281018898932, "loss": 5.568, "step": 216 }, { "epoch": 0.022500770574334736, "grad_norm": 0.9037743806838989, "learning_rate": 0.0008997534921939195, "loss": 5.5746, "step": 219 }, { "epoch": 0.022809000308229733, "grad_norm": 0.6929497122764587, "learning_rate": 0.0009120788824979458, "loss": 5.5482, "step": 222 }, { "epoch": 0.02311723004212473, "grad_norm": 0.5773665308952332, "learning_rate": 0.0009244042728019721, "loss": 5.5389, "step": 225 }, { "epoch": 0.023425459776019727, "grad_norm": 0.9532020092010498, "learning_rate": 0.0009367296631059984, "loss": 5.5369, "step": 228 }, { "epoch": 0.023733689509914724, "grad_norm": 1.2347012758255005, "learning_rate": 0.0009490550534100246, "loss": 5.5251, "step": 231 }, { "epoch": 0.02404191924380972, "grad_norm": 1.2062091827392578, "learning_rate": 0.0009613804437140509, "loss": 5.5042, "step": 234 }, { "epoch": 0.024350148977704714, "grad_norm": 1.4920969009399414, "learning_rate": 0.0009737058340180772, "loss": 5.4851, "step": 237 }, { "epoch": 0.02465837871159971, "grad_norm": 0.5619600415229797, "learning_rate": 0.0009860312243221035, "loss": 5.4889, "step": 240 }, { "epoch": 0.02496660844549471, "grad_norm": 0.8607615828514099, "learning_rate": 0.0009983566146261299, "loss": 5.4747, "step": 243 }, { "epoch": 0.025274838179389705, "grad_norm": 0.6228588223457336, "learning_rate": 0.001010682004930156, "loss": 5.4502, "step": 246 }, { "epoch": 0.025583067913284702, "grad_norm": 1.1925005912780762, "learning_rate": 0.0010230073952341824, "loss": 5.4449, "step": 249 }, { "epoch": 0.0258912976471797, "grad_norm": 0.7956414818763733, "learning_rate": 0.0010353327855382086, "loss": 5.4623, "step": 252 }, { "epoch": 0.026199527381074693, "grad_norm": 0.654242992401123, "learning_rate": 0.001047658175842235, "loss": 5.4287, "step": 255 }, { "epoch": 0.02650775711496969, "grad_norm": 0.592880368232727, "learning_rate": 0.0010599835661462614, "loss": 5.3891, "step": 258 }, { "epoch": 0.026815986848864687, "grad_norm": 0.9015865921974182, "learning_rate": 0.0010723089564502876, "loss": 5.4127, "step": 261 }, { "epoch": 0.027124216582759684, "grad_norm": 0.593488335609436, "learning_rate": 0.001084634346754314, "loss": 5.3887, "step": 264 }, { "epoch": 0.02743244631665468, "grad_norm": 0.7008156180381775, "learning_rate": 0.0010969597370583401, "loss": 5.386, "step": 267 }, { "epoch": 0.027740676050549678, "grad_norm": 0.32653194665908813, "learning_rate": 0.0011092851273623665, "loss": 5.3479, "step": 270 }, { "epoch": 0.02804890578444467, "grad_norm": 0.551142692565918, "learning_rate": 0.0011216105176663927, "loss": 5.3613, "step": 273 }, { "epoch": 0.02835713551833967, "grad_norm": 2.3521084785461426, "learning_rate": 0.001133935907970419, "loss": 5.3653, "step": 276 }, { "epoch": 0.028665365252234665, "grad_norm": 1.3452407121658325, "learning_rate": 0.0011462612982744455, "loss": 5.3559, "step": 279 }, { "epoch": 0.028973594986129662, "grad_norm": 1.0670260190963745, "learning_rate": 0.0011585866885784717, "loss": 5.3299, "step": 282 }, { "epoch": 0.02928182472002466, "grad_norm": 0.7768902778625488, "learning_rate": 0.001170912078882498, "loss": 5.3346, "step": 285 }, { "epoch": 0.029590054453919656, "grad_norm": 0.48641496896743774, "learning_rate": 0.0011832374691865242, "loss": 5.3178, "step": 288 }, { "epoch": 0.02989828418781465, "grad_norm": 0.5284126400947571, "learning_rate": 0.0011955628594905506, "loss": 5.3061, "step": 291 }, { "epoch": 0.030206513921709647, "grad_norm": 0.9099608659744263, "learning_rate": 0.0012078882497945768, "loss": 5.2764, "step": 294 }, { "epoch": 0.030514743655604644, "grad_norm": 0.7352691888809204, "learning_rate": 0.0012202136400986032, "loss": 5.2853, "step": 297 }, { "epoch": 0.03082297338949964, "grad_norm": 0.8361043334007263, "learning_rate": 0.0012325390304026294, "loss": 5.2838, "step": 300 }, { "epoch": 0.031131203123394638, "grad_norm": 1.525067925453186, "learning_rate": 0.0012448644207066558, "loss": 5.2612, "step": 303 }, { "epoch": 0.03143943285728963, "grad_norm": 0.6117688417434692, "learning_rate": 0.001257189811010682, "loss": 5.2488, "step": 306 }, { "epoch": 0.03174766259118463, "grad_norm": 0.9976358413696289, "learning_rate": 0.0012695152013147083, "loss": 5.2327, "step": 309 }, { "epoch": 0.032055892325079625, "grad_norm": 0.8152816891670227, "learning_rate": 0.0012818405916187345, "loss": 5.2095, "step": 312 }, { "epoch": 0.03236412205897462, "grad_norm": 0.8640046715736389, "learning_rate": 0.0012941659819227609, "loss": 5.1932, "step": 315 }, { "epoch": 0.03267235179286962, "grad_norm": 0.9461572170257568, "learning_rate": 0.001306491372226787, "loss": 5.1822, "step": 318 }, { "epoch": 0.03298058152676461, "grad_norm": 0.7717807292938232, "learning_rate": 0.0013188167625308134, "loss": 5.183, "step": 321 }, { "epoch": 0.03328881126065961, "grad_norm": 0.9057526588439941, "learning_rate": 0.0013311421528348396, "loss": 5.1686, "step": 324 }, { "epoch": 0.03359704099455461, "grad_norm": 0.5352618098258972, "learning_rate": 0.001343467543138866, "loss": 5.1378, "step": 327 }, { "epoch": 0.03390527072844961, "grad_norm": 1.2399810552597046, "learning_rate": 0.0013557929334428922, "loss": 5.1436, "step": 330 }, { "epoch": 0.0342135004623446, "grad_norm": 0.6678963303565979, "learning_rate": 0.0013681183237469186, "loss": 5.1488, "step": 333 }, { "epoch": 0.034521730196239594, "grad_norm": 0.6166791915893555, "learning_rate": 0.001380443714050945, "loss": 5.1239, "step": 336 }, { "epoch": 0.034829959930134595, "grad_norm": 1.1305850744247437, "learning_rate": 0.0013927691043549711, "loss": 5.1145, "step": 339 }, { "epoch": 0.03513818966402959, "grad_norm": 0.46510085463523865, "learning_rate": 0.0014050944946589975, "loss": 5.1041, "step": 342 }, { "epoch": 0.03544641939792459, "grad_norm": 0.4835362136363983, "learning_rate": 0.0014174198849630237, "loss": 5.0699, "step": 345 }, { "epoch": 0.03575464913181958, "grad_norm": 0.6595330238342285, "learning_rate": 0.00142974527526705, "loss": 5.0744, "step": 348 }, { "epoch": 0.036062878865714576, "grad_norm": 0.7306437492370605, "learning_rate": 0.0014420706655710763, "loss": 5.0703, "step": 351 }, { "epoch": 0.036371108599609576, "grad_norm": 0.5263068079948425, "learning_rate": 0.0014543960558751027, "loss": 5.0666, "step": 354 }, { "epoch": 0.03667933833350457, "grad_norm": 0.5896726250648499, "learning_rate": 0.001466721446179129, "loss": 5.0326, "step": 357 }, { "epoch": 0.03698756806739957, "grad_norm": 0.9357500672340393, "learning_rate": 0.0014790468364831552, "loss": 5.0363, "step": 360 }, { "epoch": 0.037295797801294564, "grad_norm": 0.7629897594451904, "learning_rate": 0.0014913722267871816, "loss": 5.0304, "step": 363 }, { "epoch": 0.037604027535189564, "grad_norm": 0.6347280144691467, "learning_rate": 0.0015036976170912078, "loss": 4.9962, "step": 366 }, { "epoch": 0.03791225726908456, "grad_norm": 0.4810947775840759, "learning_rate": 0.0015160230073952342, "loss": 4.9856, "step": 369 }, { "epoch": 0.03822048700297955, "grad_norm": 0.5907162427902222, "learning_rate": 0.0015283483976992604, "loss": 4.9712, "step": 372 }, { "epoch": 0.03852871673687455, "grad_norm": 0.5781192183494568, "learning_rate": 0.0015406737880032868, "loss": 5.0078, "step": 375 }, { "epoch": 0.038836946470769546, "grad_norm": 0.6017566323280334, "learning_rate": 0.001552999178307313, "loss": 4.9563, "step": 378 }, { "epoch": 0.039145176204664546, "grad_norm": 1.208348035812378, "learning_rate": 0.0015653245686113393, "loss": 4.9696, "step": 381 }, { "epoch": 0.03945340593855954, "grad_norm": 0.6113926768302917, "learning_rate": 0.0015776499589153657, "loss": 4.9461, "step": 384 }, { "epoch": 0.03976163567245453, "grad_norm": 0.6794010996818542, "learning_rate": 0.0015899753492193919, "loss": 4.9668, "step": 387 }, { "epoch": 0.040069865406349534, "grad_norm": 0.4383271038532257, "learning_rate": 0.0016023007395234183, "loss": 4.9283, "step": 390 }, { "epoch": 0.04037809514024453, "grad_norm": 0.9564613699913025, "learning_rate": 0.0016146261298274444, "loss": 4.8814, "step": 393 }, { "epoch": 0.04068632487413953, "grad_norm": 0.6730177402496338, "learning_rate": 0.0016269515201314708, "loss": 4.9158, "step": 396 }, { "epoch": 0.04099455460803452, "grad_norm": 0.5306158661842346, "learning_rate": 0.001639276910435497, "loss": 4.904, "step": 399 }, { "epoch": 0.04130278434192952, "grad_norm": 0.48708540201187134, "learning_rate": 0.0016516023007395234, "loss": 4.9002, "step": 402 }, { "epoch": 0.041611014075824515, "grad_norm": 0.4917944371700287, "learning_rate": 0.0016639276910435496, "loss": 4.8913, "step": 405 }, { "epoch": 0.04191924380971951, "grad_norm": 1.0929678678512573, "learning_rate": 0.001676253081347576, "loss": 4.8986, "step": 408 }, { "epoch": 0.04222747354361451, "grad_norm": 0.5417898297309875, "learning_rate": 0.0016885784716516024, "loss": 4.8702, "step": 411 }, { "epoch": 0.0425357032775095, "grad_norm": 1.1427472829818726, "learning_rate": 0.0017009038619556285, "loss": 4.8396, "step": 414 }, { "epoch": 0.0428439330114045, "grad_norm": 0.8225170969963074, "learning_rate": 0.001713229252259655, "loss": 4.8439, "step": 417 }, { "epoch": 0.0431521627452995, "grad_norm": 0.5638198256492615, "learning_rate": 0.001725554642563681, "loss": 4.8271, "step": 420 }, { "epoch": 0.04346039247919449, "grad_norm": 0.3389821946620941, "learning_rate": 0.0017378800328677075, "loss": 4.8207, "step": 423 }, { "epoch": 0.04376862221308949, "grad_norm": 0.38620057702064514, "learning_rate": 0.0017502054231717337, "loss": 4.8082, "step": 426 }, { "epoch": 0.044076851946984484, "grad_norm": 1.1568442583084106, "learning_rate": 0.00176253081347576, "loss": 4.7946, "step": 429 }, { "epoch": 0.044385081680879485, "grad_norm": 0.650175154209137, "learning_rate": 0.0017748562037797865, "loss": 4.7798, "step": 432 }, { "epoch": 0.04469331141477448, "grad_norm": 0.5364396572113037, "learning_rate": 0.0017871815940838126, "loss": 4.7732, "step": 435 }, { "epoch": 0.04500154114866947, "grad_norm": 0.7013806700706482, "learning_rate": 0.001799506984387839, "loss": 4.7733, "step": 438 }, { "epoch": 0.04530977088256447, "grad_norm": 0.4559784233570099, "learning_rate": 0.0018118323746918652, "loss": 4.7789, "step": 441 }, { "epoch": 0.045618000616459466, "grad_norm": 0.3456243872642517, "learning_rate": 0.0018241577649958916, "loss": 4.7456, "step": 444 }, { "epoch": 0.045926230350354466, "grad_norm": 0.6245532631874084, "learning_rate": 0.0018364831552999178, "loss": 4.7408, "step": 447 }, { "epoch": 0.04623446008424946, "grad_norm": 1.1933598518371582, "learning_rate": 0.0018488085456039441, "loss": 4.7728, "step": 450 }, { "epoch": 0.04654268981814446, "grad_norm": 0.8743248581886292, "learning_rate": 0.0018611339359079703, "loss": 4.7595, "step": 453 }, { "epoch": 0.046850919552039454, "grad_norm": 0.4980567693710327, "learning_rate": 0.0018734593262119967, "loss": 4.7222, "step": 456 }, { "epoch": 0.04715914928593445, "grad_norm": 0.6380690932273865, "learning_rate": 0.001885784716516023, "loss": 4.7175, "step": 459 }, { "epoch": 0.04746737901982945, "grad_norm": 0.3606894612312317, "learning_rate": 0.0018981101068200493, "loss": 4.7075, "step": 462 }, { "epoch": 0.04777560875372444, "grad_norm": 0.5618919730186462, "learning_rate": 0.0019104354971240757, "loss": 4.6939, "step": 465 }, { "epoch": 0.04808383848761944, "grad_norm": 0.639410138130188, "learning_rate": 0.0019227608874281018, "loss": 4.6748, "step": 468 }, { "epoch": 0.048392068221514435, "grad_norm": 0.7849680185317993, "learning_rate": 0.0019350862777321282, "loss": 4.6895, "step": 471 }, { "epoch": 0.04870029795540943, "grad_norm": 0.5419800877571106, "learning_rate": 0.0019474116680361544, "loss": 4.64, "step": 474 }, { "epoch": 0.04900852768930443, "grad_norm": 0.40359726548194885, "learning_rate": 0.001959737058340181, "loss": 4.6564, "step": 477 }, { "epoch": 0.04931675742319942, "grad_norm": 0.742076575756073, "learning_rate": 0.001972062448644207, "loss": 4.6434, "step": 480 }, { "epoch": 0.04962498715709442, "grad_norm": 0.620801568031311, "learning_rate": 0.0019843878389482336, "loss": 4.6509, "step": 483 }, { "epoch": 0.04993321689098942, "grad_norm": 0.5293563008308411, "learning_rate": 0.0019967132292522598, "loss": 4.6459, "step": 486 }, { "epoch": 0.05024144662488442, "grad_norm": 0.7527710795402527, "learning_rate": 0.002009038619556286, "loss": 4.6557, "step": 489 }, { "epoch": 0.05054967635877941, "grad_norm": 0.47365424036979675, "learning_rate": 0.002021364009860312, "loss": 4.6223, "step": 492 }, { "epoch": 0.050857906092674404, "grad_norm": 0.5232967734336853, "learning_rate": 0.0020336894001643387, "loss": 4.6186, "step": 495 }, { "epoch": 0.051166135826569405, "grad_norm": 0.40717506408691406, "learning_rate": 0.002046014790468365, "loss": 4.6125, "step": 498 }, { "epoch": 0.0514743655604644, "grad_norm": 0.5403701066970825, "learning_rate": 0.002058340180772391, "loss": 4.6143, "step": 501 }, { "epoch": 0.0517825952943594, "grad_norm": 0.7209203839302063, "learning_rate": 0.0020706655710764172, "loss": 4.5713, "step": 504 }, { "epoch": 0.05209082502825439, "grad_norm": 0.6991008520126343, "learning_rate": 0.002082990961380444, "loss": 4.6044, "step": 507 }, { "epoch": 0.052399054762149386, "grad_norm": 0.7478086352348328, "learning_rate": 0.00209531635168447, "loss": 4.5685, "step": 510 }, { "epoch": 0.052707284496044386, "grad_norm": 0.5864932537078857, "learning_rate": 0.002107641741988496, "loss": 4.588, "step": 513 }, { "epoch": 0.05301551422993938, "grad_norm": 0.44748950004577637, "learning_rate": 0.002119967132292523, "loss": 4.5823, "step": 516 }, { "epoch": 0.05332374396383438, "grad_norm": 0.32787564396858215, "learning_rate": 0.002132292522596549, "loss": 4.5522, "step": 519 }, { "epoch": 0.053631973697729374, "grad_norm": 0.30747687816619873, "learning_rate": 0.002144617912900575, "loss": 4.5429, "step": 522 }, { "epoch": 0.05394020343162437, "grad_norm": 0.3548784554004669, "learning_rate": 0.0021569433032046013, "loss": 4.5207, "step": 525 }, { "epoch": 0.05424843316551937, "grad_norm": 0.6617491841316223, "learning_rate": 0.002169268693508628, "loss": 4.5373, "step": 528 }, { "epoch": 0.05455666289941436, "grad_norm": 0.9917429089546204, "learning_rate": 0.002181594083812654, "loss": 4.5504, "step": 531 }, { "epoch": 0.05486489263330936, "grad_norm": 0.6506537795066833, "learning_rate": 0.0021939194741166803, "loss": 4.5385, "step": 534 }, { "epoch": 0.055173122367204355, "grad_norm": 0.3738003075122833, "learning_rate": 0.002206244864420707, "loss": 4.5169, "step": 537 }, { "epoch": 0.055481352101099356, "grad_norm": 0.3488200008869171, "learning_rate": 0.002218570254724733, "loss": 4.5119, "step": 540 }, { "epoch": 0.05578958183499435, "grad_norm": 0.31217944622039795, "learning_rate": 0.0022308956450287592, "loss": 4.4796, "step": 543 }, { "epoch": 0.05609781156888934, "grad_norm": 0.26770153641700745, "learning_rate": 0.0022432210353327854, "loss": 4.4699, "step": 546 }, { "epoch": 0.05640604130278434, "grad_norm": 0.3656662702560425, "learning_rate": 0.002255546425636812, "loss": 4.4817, "step": 549 }, { "epoch": 0.05671427103667934, "grad_norm": 0.5845988392829895, "learning_rate": 0.002267871815940838, "loss": 4.4525, "step": 552 }, { "epoch": 0.05702250077057434, "grad_norm": 0.41006627678871155, "learning_rate": 0.0022801972062448644, "loss": 4.4649, "step": 555 }, { "epoch": 0.05733073050446933, "grad_norm": 1.2013694047927856, "learning_rate": 0.002292522596548891, "loss": 4.4666, "step": 558 }, { "epoch": 0.057638960238364324, "grad_norm": 0.6116489171981812, "learning_rate": 0.002304847986852917, "loss": 4.4853, "step": 561 }, { "epoch": 0.057947189972259325, "grad_norm": 0.30115845799446106, "learning_rate": 0.0023171733771569433, "loss": 4.4409, "step": 564 }, { "epoch": 0.05825541970615432, "grad_norm": 0.2863396108150482, "learning_rate": 0.0023294987674609695, "loss": 4.4358, "step": 567 }, { "epoch": 0.05856364944004932, "grad_norm": 0.3191300928592682, "learning_rate": 0.002341824157764996, "loss": 4.454, "step": 570 }, { "epoch": 0.05887187917394431, "grad_norm": 0.4280944764614105, "learning_rate": 0.0023541495480690223, "loss": 4.3943, "step": 573 }, { "epoch": 0.05918010890783931, "grad_norm": 0.49310484528541565, "learning_rate": 0.0023664749383730485, "loss": 4.4097, "step": 576 }, { "epoch": 0.059488338641734306, "grad_norm": 0.4923991858959198, "learning_rate": 0.002378800328677075, "loss": 4.4454, "step": 579 }, { "epoch": 0.0597965683756293, "grad_norm": 0.5043625235557556, "learning_rate": 0.0023911257189811012, "loss": 4.3975, "step": 582 }, { "epoch": 0.0601047981095243, "grad_norm": 0.5404270887374878, "learning_rate": 0.0024034511092851274, "loss": 4.3957, "step": 585 }, { "epoch": 0.060413027843419294, "grad_norm": 0.9954332709312439, "learning_rate": 0.0024157764995891536, "loss": 4.3864, "step": 588 }, { "epoch": 0.060721257577314294, "grad_norm": 0.3632584512233734, "learning_rate": 0.00242810188989318, "loss": 4.38, "step": 591 }, { "epoch": 0.06102948731120929, "grad_norm": 0.2620343267917633, "learning_rate": 0.0024404272801972064, "loss": 4.3538, "step": 594 }, { "epoch": 0.06133771704510428, "grad_norm": 0.25050923228263855, "learning_rate": 0.0024527526705012325, "loss": 4.351, "step": 597 }, { "epoch": 0.06164594677899928, "grad_norm": 0.27279627323150635, "learning_rate": 0.0024650780608052587, "loss": 4.3335, "step": 600 }, { "epoch": 0.061954176512894275, "grad_norm": 0.6038771271705627, "learning_rate": 0.0024774034511092853, "loss": 4.3409, "step": 603 }, { "epoch": 0.062262406246789276, "grad_norm": 0.6948337554931641, "learning_rate": 0.0024897288414133115, "loss": 4.3555, "step": 606 }, { "epoch": 0.06257063598068427, "grad_norm": 0.5086238980293274, "learning_rate": 0.0025020542317173377, "loss": 4.3491, "step": 609 }, { "epoch": 0.06287886571457926, "grad_norm": 0.475999116897583, "learning_rate": 0.002514379622021364, "loss": 4.3412, "step": 612 }, { "epoch": 0.06318709544847426, "grad_norm": 0.3968357741832733, "learning_rate": 0.0025267050123253905, "loss": 4.3139, "step": 615 }, { "epoch": 0.06349532518236926, "grad_norm": 0.6681760549545288, "learning_rate": 0.0025390304026294166, "loss": 4.2999, "step": 618 }, { "epoch": 0.06380355491626426, "grad_norm": 0.3453294634819031, "learning_rate": 0.002551355792933443, "loss": 4.2873, "step": 621 }, { "epoch": 0.06411178465015925, "grad_norm": 0.3346744775772095, "learning_rate": 0.002563681183237469, "loss": 4.2868, "step": 624 }, { "epoch": 0.06442001438405424, "grad_norm": 0.39689645171165466, "learning_rate": 0.0025760065735414956, "loss": 4.2846, "step": 627 }, { "epoch": 0.06472824411794924, "grad_norm": 0.4017212688922882, "learning_rate": 0.0025883319638455218, "loss": 4.2625, "step": 630 }, { "epoch": 0.06503647385184425, "grad_norm": 0.3414025902748108, "learning_rate": 0.0026006573541495484, "loss": 4.2657, "step": 633 }, { "epoch": 0.06534470358573924, "grad_norm": 0.4091610312461853, "learning_rate": 0.002612982744453574, "loss": 4.2414, "step": 636 }, { "epoch": 0.06565293331963423, "grad_norm": 0.3916926085948944, "learning_rate": 0.0026253081347576007, "loss": 4.1801, "step": 639 }, { "epoch": 0.06596116305352923, "grad_norm": 1.0324465036392212, "learning_rate": 0.002637633525061627, "loss": 4.2162, "step": 642 }, { "epoch": 0.06626939278742423, "grad_norm": 0.4595172107219696, "learning_rate": 0.0026499589153656535, "loss": 4.2352, "step": 645 }, { "epoch": 0.06657762252131923, "grad_norm": 0.3215947151184082, "learning_rate": 0.0026622843056696792, "loss": 4.1865, "step": 648 }, { "epoch": 0.06688585225521422, "grad_norm": 0.2739149034023285, "learning_rate": 0.002674609695973706, "loss": 4.1644, "step": 651 }, { "epoch": 0.06719408198910921, "grad_norm": 0.250794917345047, "learning_rate": 0.002686935086277732, "loss": 4.1164, "step": 654 }, { "epoch": 0.06750231172300421, "grad_norm": 0.38465654850006104, "learning_rate": 0.0026992604765817586, "loss": 4.0844, "step": 657 }, { "epoch": 0.06781054145689921, "grad_norm": 0.5341691970825195, "learning_rate": 0.0027115858668857844, "loss": 4.1149, "step": 660 }, { "epoch": 0.06811877119079421, "grad_norm": 0.3479110896587372, "learning_rate": 0.002723911257189811, "loss": 4.1186, "step": 663 }, { "epoch": 0.0684270009246892, "grad_norm": 1.026038646697998, "learning_rate": 0.002736236647493837, "loss": 4.1293, "step": 666 }, { "epoch": 0.0687352306585842, "grad_norm": 0.445689857006073, "learning_rate": 0.0027485620377978638, "loss": 4.1319, "step": 669 }, { "epoch": 0.06904346039247919, "grad_norm": 0.3061058819293976, "learning_rate": 0.00276088742810189, "loss": 4.0338, "step": 672 }, { "epoch": 0.0693516901263742, "grad_norm": 0.26792746782302856, "learning_rate": 0.002773212818405916, "loss": 4.0154, "step": 675 }, { "epoch": 0.06965991986026919, "grad_norm": 0.2843894064426422, "learning_rate": 0.0027855382087099423, "loss": 4.0364, "step": 678 }, { "epoch": 0.06996814959416418, "grad_norm": 0.3073459565639496, "learning_rate": 0.002797863599013969, "loss": 3.9836, "step": 681 }, { "epoch": 0.07027637932805918, "grad_norm": 0.5893545746803284, "learning_rate": 0.002810188989317995, "loss": 4.0062, "step": 684 }, { "epoch": 0.07058460906195417, "grad_norm": 0.5386547446250916, "learning_rate": 0.0028225143796220217, "loss": 4.0066, "step": 687 }, { "epoch": 0.07089283879584918, "grad_norm": 0.7944250106811523, "learning_rate": 0.0028348397699260474, "loss": 3.9816, "step": 690 }, { "epoch": 0.07120106852974417, "grad_norm": 0.32200196385383606, "learning_rate": 0.002847165160230074, "loss": 3.9551, "step": 693 }, { "epoch": 0.07150929826363916, "grad_norm": 0.28814995288848877, "learning_rate": 0.0028594905505341, "loss": 3.912, "step": 696 }, { "epoch": 0.07181752799753416, "grad_norm": 0.2727998197078705, "learning_rate": 0.002871815940838127, "loss": 3.9203, "step": 699 }, { "epoch": 0.07212575773142915, "grad_norm": 0.2785607576370239, "learning_rate": 0.0028841413311421525, "loss": 3.8865, "step": 702 }, { "epoch": 0.07243398746532416, "grad_norm": 0.4318368136882782, "learning_rate": 0.002896466721446179, "loss": 3.8364, "step": 705 }, { "epoch": 0.07274221719921915, "grad_norm": 0.5888954997062683, "learning_rate": 0.0029087921117502053, "loss": 3.8772, "step": 708 }, { "epoch": 0.07305044693311415, "grad_norm": 0.5866847634315491, "learning_rate": 0.002921117502054232, "loss": 3.9039, "step": 711 }, { "epoch": 0.07335867666700914, "grad_norm": 0.40300968289375305, "learning_rate": 0.002933442892358258, "loss": 3.8332, "step": 714 }, { "epoch": 0.07366690640090415, "grad_norm": 0.2894107401371002, "learning_rate": 0.0029457682826622843, "loss": 3.8533, "step": 717 }, { "epoch": 0.07397513613479914, "grad_norm": 0.2637479901313782, "learning_rate": 0.0029580936729663105, "loss": 3.7962, "step": 720 }, { "epoch": 0.07428336586869413, "grad_norm": 0.5004228353500366, "learning_rate": 0.002970419063270337, "loss": 3.7759, "step": 723 }, { "epoch": 0.07459159560258913, "grad_norm": 0.30835986137390137, "learning_rate": 0.0029827444535743632, "loss": 3.7819, "step": 726 }, { "epoch": 0.07489982533648412, "grad_norm": 0.5601911544799805, "learning_rate": 0.00299506984387839, "loss": 3.7716, "step": 729 }, { "epoch": 0.07520805507037913, "grad_norm": 0.48242396116256714, "learning_rate": 0.0030073952341824156, "loss": 3.803, "step": 732 }, { "epoch": 0.07551628480427412, "grad_norm": 0.355916827917099, "learning_rate": 0.003019720624486442, "loss": 3.7532, "step": 735 }, { "epoch": 0.07582451453816912, "grad_norm": 0.4205069839954376, "learning_rate": 0.0030320460147904684, "loss": 3.7657, "step": 738 }, { "epoch": 0.07613274427206411, "grad_norm": 0.35680562257766724, "learning_rate": 0.003044371405094495, "loss": 3.7348, "step": 741 }, { "epoch": 0.0764409740059591, "grad_norm": 0.36372673511505127, "learning_rate": 0.0030566967953985207, "loss": 3.7569, "step": 744 }, { "epoch": 0.07674920373985411, "grad_norm": 0.2887914776802063, "learning_rate": 0.0030690221857025473, "loss": 3.7704, "step": 747 }, { "epoch": 0.0770574334737491, "grad_norm": 0.255290687084198, "learning_rate": 0.0030813475760065735, "loss": 3.7054, "step": 750 }, { "epoch": 0.0773656632076441, "grad_norm": 0.2969897389411926, "learning_rate": 0.0030936729663106, "loss": 3.708, "step": 753 }, { "epoch": 0.07767389294153909, "grad_norm": 0.491763710975647, "learning_rate": 0.003105998356614626, "loss": 3.7279, "step": 756 }, { "epoch": 0.07798212267543408, "grad_norm": 0.6437285542488098, "learning_rate": 0.0031183237469186525, "loss": 3.7129, "step": 759 }, { "epoch": 0.07829035240932909, "grad_norm": 0.3605806827545166, "learning_rate": 0.0031306491372226786, "loss": 3.6754, "step": 762 }, { "epoch": 0.07859858214322409, "grad_norm": 0.26162126660346985, "learning_rate": 0.0031429745275267052, "loss": 3.6869, "step": 765 }, { "epoch": 0.07890681187711908, "grad_norm": 0.3107220530509949, "learning_rate": 0.0031552999178307314, "loss": 3.6278, "step": 768 }, { "epoch": 0.07921504161101407, "grad_norm": 0.30417200922966003, "learning_rate": 0.0031676253081347576, "loss": 3.6046, "step": 771 }, { "epoch": 0.07952327134490907, "grad_norm": 0.5612326860427856, "learning_rate": 0.0031799506984387838, "loss": 3.6578, "step": 774 }, { "epoch": 0.07983150107880407, "grad_norm": 0.6136355996131897, "learning_rate": 0.0031922760887428104, "loss": 3.6897, "step": 777 }, { "epoch": 0.08013973081269907, "grad_norm": 0.4560060501098633, "learning_rate": 0.0032046014790468366, "loss": 3.6976, "step": 780 }, { "epoch": 0.08044796054659406, "grad_norm": 0.23871034383773804, "learning_rate": 0.003216926869350863, "loss": 3.657, "step": 783 }, { "epoch": 0.08075619028048905, "grad_norm": 0.17063000798225403, "learning_rate": 0.003229252259654889, "loss": 3.5905, "step": 786 }, { "epoch": 0.08106442001438405, "grad_norm": 0.35351842641830444, "learning_rate": 0.0032415776499589155, "loss": 3.603, "step": 789 }, { "epoch": 0.08137264974827906, "grad_norm": 0.340762197971344, "learning_rate": 0.0032539030402629417, "loss": 3.5978, "step": 792 }, { "epoch": 0.08168087948217405, "grad_norm": 0.22542034089565277, "learning_rate": 0.0032662284305669683, "loss": 3.5821, "step": 795 }, { "epoch": 0.08198910921606904, "grad_norm": 0.25130555033683777, "learning_rate": 0.003278553820870994, "loss": 3.5491, "step": 798 }, { "epoch": 0.08229733894996404, "grad_norm": 0.5155714750289917, "learning_rate": 0.0032908792111750206, "loss": 3.5605, "step": 801 }, { "epoch": 0.08260556868385904, "grad_norm": 0.3964254856109619, "learning_rate": 0.003303204601479047, "loss": 3.581, "step": 804 }, { "epoch": 0.08291379841775404, "grad_norm": 0.27110666036605835, "learning_rate": 0.0033155299917830734, "loss": 3.5995, "step": 807 }, { "epoch": 0.08322202815164903, "grad_norm": 0.38535767793655396, "learning_rate": 0.003327855382087099, "loss": 3.6029, "step": 810 }, { "epoch": 0.08353025788554402, "grad_norm": 0.6176712512969971, "learning_rate": 0.0033401807723911258, "loss": 3.5694, "step": 813 }, { "epoch": 0.08383848761943902, "grad_norm": 0.33828550577163696, "learning_rate": 0.003352506162695152, "loss": 3.5507, "step": 816 }, { "epoch": 0.08414671735333402, "grad_norm": 0.2286808043718338, "learning_rate": 0.0033648315529991786, "loss": 3.5345, "step": 819 }, { "epoch": 0.08445494708722902, "grad_norm": 0.30232542753219604, "learning_rate": 0.0033771569433032047, "loss": 3.5154, "step": 822 }, { "epoch": 0.08476317682112401, "grad_norm": 0.31767842173576355, "learning_rate": 0.0033894823336072313, "loss": 3.5442, "step": 825 }, { "epoch": 0.085071406555019, "grad_norm": 0.4275444746017456, "learning_rate": 0.003401807723911257, "loss": 3.5715, "step": 828 }, { "epoch": 0.085379636288914, "grad_norm": 0.3426364064216614, "learning_rate": 0.0034141331142152837, "loss": 3.5224, "step": 831 }, { "epoch": 0.085687866022809, "grad_norm": 0.33871403336524963, "learning_rate": 0.00342645850451931, "loss": 3.5119, "step": 834 }, { "epoch": 0.085996095756704, "grad_norm": 0.2641143202781677, "learning_rate": 0.0034387838948233365, "loss": 3.5179, "step": 837 }, { "epoch": 0.086304325490599, "grad_norm": 0.22955679893493652, "learning_rate": 0.003451109285127362, "loss": 3.4807, "step": 840 }, { "epoch": 0.08661255522449399, "grad_norm": 0.3795819878578186, "learning_rate": 0.003463434675431389, "loss": 3.4916, "step": 843 }, { "epoch": 0.08692078495838898, "grad_norm": 0.2942325174808502, "learning_rate": 0.003475760065735415, "loss": 3.4601, "step": 846 }, { "epoch": 0.08722901469228399, "grad_norm": 0.49732574820518494, "learning_rate": 0.0034880854560394416, "loss": 3.5021, "step": 849 }, { "epoch": 0.08753724442617898, "grad_norm": 0.4395911991596222, "learning_rate": 0.0035004108463434673, "loss": 3.4976, "step": 852 }, { "epoch": 0.08784547416007397, "grad_norm": 0.24201816320419312, "learning_rate": 0.003512736236647494, "loss": 3.4648, "step": 855 }, { "epoch": 0.08815370389396897, "grad_norm": 0.32818078994750977, "learning_rate": 0.00352506162695152, "loss": 3.4826, "step": 858 }, { "epoch": 0.08846193362786396, "grad_norm": 0.4433400630950928, "learning_rate": 0.0035373870172555467, "loss": 3.4763, "step": 861 }, { "epoch": 0.08877016336175897, "grad_norm": 0.2911035716533661, "learning_rate": 0.003549712407559573, "loss": 3.5024, "step": 864 }, { "epoch": 0.08907839309565396, "grad_norm": 0.27419009804725647, "learning_rate": 0.003562037797863599, "loss": 3.4382, "step": 867 }, { "epoch": 0.08938662282954896, "grad_norm": 0.2970244586467743, "learning_rate": 0.0035743631881676253, "loss": 3.4362, "step": 870 }, { "epoch": 0.08969485256344395, "grad_norm": 0.34221401810646057, "learning_rate": 0.003586688578471652, "loss": 3.4469, "step": 873 }, { "epoch": 0.09000308229733894, "grad_norm": 0.31807199120521545, "learning_rate": 0.003599013968775678, "loss": 3.3974, "step": 876 }, { "epoch": 0.09031131203123395, "grad_norm": 0.31519362330436707, "learning_rate": 0.0036113393590797046, "loss": 3.4275, "step": 879 }, { "epoch": 0.09061954176512894, "grad_norm": 0.5152423977851868, "learning_rate": 0.0036236647493837304, "loss": 3.4468, "step": 882 }, { "epoch": 0.09092777149902394, "grad_norm": 0.32447418570518494, "learning_rate": 0.003635990139687757, "loss": 3.4505, "step": 885 }, { "epoch": 0.09123600123291893, "grad_norm": 0.19884614646434784, "learning_rate": 0.003648315529991783, "loss": 3.4228, "step": 888 }, { "epoch": 0.09154423096681394, "grad_norm": 0.2726935148239136, "learning_rate": 0.0036606409202958098, "loss": 3.3957, "step": 891 }, { "epoch": 0.09185246070070893, "grad_norm": 0.29470425844192505, "learning_rate": 0.0036729663105998355, "loss": 3.3813, "step": 894 }, { "epoch": 0.09216069043460393, "grad_norm": 0.27806392312049866, "learning_rate": 0.003685291700903862, "loss": 3.3871, "step": 897 }, { "epoch": 0.09246892016849892, "grad_norm": 0.23773950338363647, "learning_rate": 0.0036976170912078883, "loss": 3.3941, "step": 900 }, { "epoch": 0.09277714990239391, "grad_norm": 0.45804303884506226, "learning_rate": 0.003709942481511915, "loss": 3.3752, "step": 903 }, { "epoch": 0.09308537963628892, "grad_norm": 0.45320865511894226, "learning_rate": 0.0037222678718159406, "loss": 3.4068, "step": 906 }, { "epoch": 0.09339360937018391, "grad_norm": 0.277089387178421, "learning_rate": 0.0037345932621199673, "loss": 3.4052, "step": 909 }, { "epoch": 0.09370183910407891, "grad_norm": 0.26548513770103455, "learning_rate": 0.0037469186524239934, "loss": 3.3753, "step": 912 }, { "epoch": 0.0940100688379739, "grad_norm": 0.24219335615634918, "learning_rate": 0.00375924404272802, "loss": 3.3913, "step": 915 }, { "epoch": 0.0943182985718689, "grad_norm": 0.2855617105960846, "learning_rate": 0.003771569433032046, "loss": 3.3636, "step": 918 }, { "epoch": 0.0946265283057639, "grad_norm": 0.35244864225387573, "learning_rate": 0.003783894823336073, "loss": 3.3603, "step": 921 }, { "epoch": 0.0949347580396589, "grad_norm": 0.3226896822452545, "learning_rate": 0.0037962202136400986, "loss": 3.3267, "step": 924 }, { "epoch": 0.09524298777355389, "grad_norm": 0.279863178730011, "learning_rate": 0.003808545603944125, "loss": 3.3192, "step": 927 }, { "epoch": 0.09555121750744888, "grad_norm": 0.35309404134750366, "learning_rate": 0.0038208709942481513, "loss": 3.2978, "step": 930 }, { "epoch": 0.09585944724134388, "grad_norm": 0.2359645515680313, "learning_rate": 0.003833196384552178, "loss": 3.3627, "step": 933 }, { "epoch": 0.09616767697523888, "grad_norm": 0.22583429515361786, "learning_rate": 0.0038455217748562037, "loss": 3.2669, "step": 936 }, { "epoch": 0.09647590670913388, "grad_norm": 0.2914174199104309, "learning_rate": 0.0038578471651602303, "loss": 3.3238, "step": 939 }, { "epoch": 0.09678413644302887, "grad_norm": 0.37748411297798157, "learning_rate": 0.0038701725554642565, "loss": 3.3232, "step": 942 }, { "epoch": 0.09709236617692386, "grad_norm": 0.28686878085136414, "learning_rate": 0.003882497945768283, "loss": 3.3143, "step": 945 }, { "epoch": 0.09740059591081886, "grad_norm": 0.22591544687747955, "learning_rate": 0.003894823336072309, "loss": 3.3285, "step": 948 }, { "epoch": 0.09770882564471386, "grad_norm": 0.24365665018558502, "learning_rate": 0.003907148726376336, "loss": 3.2799, "step": 951 }, { "epoch": 0.09801705537860886, "grad_norm": 0.3929263651371002, "learning_rate": 0.003919474116680362, "loss": 3.29, "step": 954 }, { "epoch": 0.09832528511250385, "grad_norm": 0.20268237590789795, "learning_rate": 0.003931799506984388, "loss": 3.2412, "step": 957 }, { "epoch": 0.09863351484639885, "grad_norm": 0.3333010673522949, "learning_rate": 0.003944124897288414, "loss": 3.2523, "step": 960 }, { "epoch": 0.09894174458029384, "grad_norm": 0.32760193943977356, "learning_rate": 0.0039564502875924406, "loss": 3.2958, "step": 963 }, { "epoch": 0.09924997431418885, "grad_norm": 0.27670565247535706, "learning_rate": 0.003968775677896467, "loss": 3.2683, "step": 966 }, { "epoch": 0.09955820404808384, "grad_norm": 0.32110410928726196, "learning_rate": 0.003981101068200493, "loss": 3.2576, "step": 969 }, { "epoch": 0.09986643378197883, "grad_norm": 0.43541696667671204, "learning_rate": 0.0039934264585045195, "loss": 3.2924, "step": 972 }, { "epoch": 0.10017466351587383, "grad_norm": 0.3483084738254547, "learning_rate": 0.004005751848808546, "loss": 3.2936, "step": 975 }, { "epoch": 0.10048289324976883, "grad_norm": 0.29586124420166016, "learning_rate": 0.004018077239112572, "loss": 3.2511, "step": 978 }, { "epoch": 0.10079112298366383, "grad_norm": 0.21434040367603302, "learning_rate": 0.0040304026294165985, "loss": 3.242, "step": 981 }, { "epoch": 0.10109935271755882, "grad_norm": 0.35204213857650757, "learning_rate": 0.004042728019720624, "loss": 3.2156, "step": 984 }, { "epoch": 0.10140758245145381, "grad_norm": 0.25223758816719055, "learning_rate": 0.004055053410024651, "loss": 3.257, "step": 987 }, { "epoch": 0.10171581218534881, "grad_norm": 0.2969653010368347, "learning_rate": 0.004067378800328677, "loss": 3.2576, "step": 990 }, { "epoch": 0.10202404191924382, "grad_norm": 0.26683250069618225, "learning_rate": 0.004079704190632704, "loss": 3.1998, "step": 993 }, { "epoch": 0.10233227165313881, "grad_norm": 0.26404044032096863, "learning_rate": 0.00409202958093673, "loss": 3.2303, "step": 996 }, { "epoch": 0.1026405013870338, "grad_norm": 0.2442736029624939, "learning_rate": 0.004104354971240756, "loss": 3.2428, "step": 999 }, { "epoch": 0.1029487311209288, "grad_norm": 0.2192964255809784, "learning_rate": 0.004116680361544782, "loss": 3.2661, "step": 1002 }, { "epoch": 0.10325696085482379, "grad_norm": 0.21057608723640442, "learning_rate": 0.004129005751848809, "loss": 3.1995, "step": 1005 }, { "epoch": 0.1035651905887188, "grad_norm": 0.3122745454311371, "learning_rate": 0.0041413311421528345, "loss": 3.2104, "step": 1008 }, { "epoch": 0.10387342032261379, "grad_norm": 0.643337607383728, "learning_rate": 0.004153656532456861, "loss": 3.2196, "step": 1011 }, { "epoch": 0.10418165005650878, "grad_norm": 0.265302449464798, "learning_rate": 0.004165981922760888, "loss": 3.2163, "step": 1014 }, { "epoch": 0.10448987979040378, "grad_norm": 0.27250421047210693, "learning_rate": 0.004178307313064914, "loss": 3.1781, "step": 1017 }, { "epoch": 0.10479810952429877, "grad_norm": 0.3951704800128937, "learning_rate": 0.00419063270336894, "loss": 3.2405, "step": 1020 }, { "epoch": 0.10510633925819378, "grad_norm": 0.20837850868701935, "learning_rate": 0.004202958093672967, "loss": 3.2269, "step": 1023 }, { "epoch": 0.10541456899208877, "grad_norm": 0.3887670338153839, "learning_rate": 0.004215283483976992, "loss": 3.219, "step": 1026 }, { "epoch": 0.10572279872598377, "grad_norm": 0.18901754915714264, "learning_rate": 0.004227608874281019, "loss": 3.1759, "step": 1029 }, { "epoch": 0.10603102845987876, "grad_norm": 0.3570176362991333, "learning_rate": 0.004239934264585046, "loss": 3.1544, "step": 1032 }, { "epoch": 0.10633925819377375, "grad_norm": 0.2346538007259369, "learning_rate": 0.004252259654889072, "loss": 3.1834, "step": 1035 }, { "epoch": 0.10664748792766876, "grad_norm": 0.1956055760383606, "learning_rate": 0.004264585045193098, "loss": 3.1597, "step": 1038 }, { "epoch": 0.10695571766156375, "grad_norm": 0.19475719332695007, "learning_rate": 0.0042769104354971246, "loss": 3.1818, "step": 1041 }, { "epoch": 0.10726394739545875, "grad_norm": 0.20991206169128418, "learning_rate": 0.00428923582580115, "loss": 3.148, "step": 1044 }, { "epoch": 0.10757217712935374, "grad_norm": 0.45754027366638184, "learning_rate": 0.004301561216105177, "loss": 3.1838, "step": 1047 }, { "epoch": 0.10788040686324873, "grad_norm": 0.2500004470348358, "learning_rate": 0.004313886606409203, "loss": 3.158, "step": 1050 }, { "epoch": 0.10818863659714374, "grad_norm": 0.29174116253852844, "learning_rate": 0.004326211996713229, "loss": 3.1619, "step": 1053 }, { "epoch": 0.10849686633103874, "grad_norm": 0.1642913520336151, "learning_rate": 0.004338537387017256, "loss": 3.1313, "step": 1056 }, { "epoch": 0.10880509606493373, "grad_norm": 0.20638629794120789, "learning_rate": 0.004350862777321282, "loss": 3.1553, "step": 1059 }, { "epoch": 0.10911332579882872, "grad_norm": 0.2534577548503876, "learning_rate": 0.004363188167625308, "loss": 3.146, "step": 1062 }, { "epoch": 0.10942155553272373, "grad_norm": 0.3894107937812805, "learning_rate": 0.004375513557929334, "loss": 3.1702, "step": 1065 }, { "epoch": 0.10972978526661872, "grad_norm": 0.18316411972045898, "learning_rate": 0.0043878389482333606, "loss": 3.1306, "step": 1068 }, { "epoch": 0.11003801500051372, "grad_norm": 0.22901946306228638, "learning_rate": 0.004400164338537387, "loss": 3.1012, "step": 1071 }, { "epoch": 0.11034624473440871, "grad_norm": 0.3013692796230316, "learning_rate": 0.004412489728841414, "loss": 3.1266, "step": 1074 }, { "epoch": 0.1106544744683037, "grad_norm": 0.26568275690078735, "learning_rate": 0.0044248151191454395, "loss": 3.1161, "step": 1077 }, { "epoch": 0.11096270420219871, "grad_norm": 0.23559318482875824, "learning_rate": 0.004437140509449466, "loss": 3.125, "step": 1080 }, { "epoch": 0.1112709339360937, "grad_norm": 0.29804936051368713, "learning_rate": 0.004449465899753492, "loss": 3.1212, "step": 1083 }, { "epoch": 0.1115791636699887, "grad_norm": 0.2965604066848755, "learning_rate": 0.0044617912900575185, "loss": 3.1435, "step": 1086 }, { "epoch": 0.11188739340388369, "grad_norm": 0.22977206110954285, "learning_rate": 0.004474116680361544, "loss": 3.1355, "step": 1089 }, { "epoch": 0.11219562313777869, "grad_norm": 0.2511363923549652, "learning_rate": 0.004486442070665571, "loss": 3.1041, "step": 1092 }, { "epoch": 0.11250385287167369, "grad_norm": 0.13533104956150055, "learning_rate": 0.004498767460969597, "loss": 3.1006, "step": 1095 }, { "epoch": 0.11281208260556869, "grad_norm": 0.1323193609714508, "learning_rate": 0.004511092851273624, "loss": 3.0623, "step": 1098 }, { "epoch": 0.11312031233946368, "grad_norm": 0.24355067312717438, "learning_rate": 0.00452341824157765, "loss": 3.109, "step": 1101 }, { "epoch": 0.11342854207335867, "grad_norm": 0.45989617705345154, "learning_rate": 0.004535743631881676, "loss": 3.1102, "step": 1104 }, { "epoch": 0.11373677180725367, "grad_norm": 0.27389761805534363, "learning_rate": 0.004548069022185702, "loss": 3.1058, "step": 1107 }, { "epoch": 0.11404500154114867, "grad_norm": 0.3120715320110321, "learning_rate": 0.004560394412489729, "loss": 3.0936, "step": 1110 }, { "epoch": 0.11435323127504367, "grad_norm": 0.3641244173049927, "learning_rate": 0.004572719802793755, "loss": 3.0895, "step": 1113 }, { "epoch": 0.11466146100893866, "grad_norm": 0.16439078748226166, "learning_rate": 0.004585045193097782, "loss": 3.0697, "step": 1116 }, { "epoch": 0.11496969074283366, "grad_norm": 0.21766935288906097, "learning_rate": 0.004597370583401808, "loss": 3.0952, "step": 1119 }, { "epoch": 0.11527792047672865, "grad_norm": 0.1682632714509964, "learning_rate": 0.004609695973705834, "loss": 3.0644, "step": 1122 }, { "epoch": 0.11558615021062366, "grad_norm": 0.18391060829162598, "learning_rate": 0.00462202136400986, "loss": 3.0565, "step": 1125 }, { "epoch": 0.11589437994451865, "grad_norm": 0.2503467798233032, "learning_rate": 0.004634346754313887, "loss": 3.0798, "step": 1128 }, { "epoch": 0.11620260967841364, "grad_norm": 0.3139159083366394, "learning_rate": 0.004646672144617912, "loss": 3.0784, "step": 1131 }, { "epoch": 0.11651083941230864, "grad_norm": 0.2205217182636261, "learning_rate": 0.004658997534921939, "loss": 3.0696, "step": 1134 }, { "epoch": 0.11681906914620364, "grad_norm": 0.322355180978775, "learning_rate": 0.004671322925225966, "loss": 3.0811, "step": 1137 }, { "epoch": 0.11712729888009864, "grad_norm": 0.27023863792419434, "learning_rate": 0.004683648315529992, "loss": 3.0955, "step": 1140 }, { "epoch": 0.11743552861399363, "grad_norm": 0.2672137916088104, "learning_rate": 0.004695973705834018, "loss": 3.0584, "step": 1143 }, { "epoch": 0.11774375834788862, "grad_norm": 0.271323561668396, "learning_rate": 0.0047082990961380446, "loss": 3.0483, "step": 1146 }, { "epoch": 0.11805198808178362, "grad_norm": 0.1428508758544922, "learning_rate": 0.00472062448644207, "loss": 3.0661, "step": 1149 }, { "epoch": 0.11836021781567863, "grad_norm": 0.29395970702171326, "learning_rate": 0.004732949876746097, "loss": 3.0391, "step": 1152 }, { "epoch": 0.11866844754957362, "grad_norm": 0.22083403170108795, "learning_rate": 0.0047452752670501235, "loss": 3.0579, "step": 1155 }, { "epoch": 0.11897667728346861, "grad_norm": 0.2015424370765686, "learning_rate": 0.00475760065735415, "loss": 3.0356, "step": 1158 }, { "epoch": 0.1192849070173636, "grad_norm": 0.21997034549713135, "learning_rate": 0.004769926047658176, "loss": 3.0301, "step": 1161 }, { "epoch": 0.1195931367512586, "grad_norm": 0.16206422448158264, "learning_rate": 0.0047822514379622025, "loss": 3.0407, "step": 1164 }, { "epoch": 0.11990136648515361, "grad_norm": 0.22591377794742584, "learning_rate": 0.004794576828266228, "loss": 3.0414, "step": 1167 }, { "epoch": 0.1202095962190486, "grad_norm": 0.2582632601261139, "learning_rate": 0.004806902218570255, "loss": 3.0148, "step": 1170 }, { "epoch": 0.1205178259529436, "grad_norm": 0.273416131734848, "learning_rate": 0.004819227608874281, "loss": 3.0023, "step": 1173 }, { "epoch": 0.12082605568683859, "grad_norm": 0.16373753547668457, "learning_rate": 0.004831552999178307, "loss": 3.0127, "step": 1176 }, { "epoch": 0.12113428542073358, "grad_norm": 0.2623594105243683, "learning_rate": 0.004843878389482334, "loss": 3.0635, "step": 1179 }, { "epoch": 0.12144251515462859, "grad_norm": 0.34809616208076477, "learning_rate": 0.00485620377978636, "loss": 3.0222, "step": 1182 }, { "epoch": 0.12175074488852358, "grad_norm": 0.23841938376426697, "learning_rate": 0.004868529170090386, "loss": 3.019, "step": 1185 }, { "epoch": 0.12205897462241858, "grad_norm": 0.2161986231803894, "learning_rate": 0.004880854560394413, "loss": 2.9934, "step": 1188 }, { "epoch": 0.12236720435631357, "grad_norm": 0.2870507836341858, "learning_rate": 0.0048931799506984385, "loss": 3.0438, "step": 1191 }, { "epoch": 0.12267543409020856, "grad_norm": 0.20796675980091095, "learning_rate": 0.004905505341002465, "loss": 2.9947, "step": 1194 }, { "epoch": 0.12298366382410357, "grad_norm": 0.1762983798980713, "learning_rate": 0.004917830731306492, "loss": 2.9729, "step": 1197 }, { "epoch": 0.12329189355799856, "grad_norm": 0.1240881159901619, "learning_rate": 0.0049301561216105174, "loss": 3.0149, "step": 1200 }, { "epoch": 0.12360012329189356, "grad_norm": 0.16968263685703278, "learning_rate": 0.004942481511914544, "loss": 2.9944, "step": 1203 }, { "epoch": 0.12390835302578855, "grad_norm": 0.1743592470884323, "learning_rate": 0.004954806902218571, "loss": 2.9947, "step": 1206 }, { "epoch": 0.12421658275968354, "grad_norm": 0.29677319526672363, "learning_rate": 0.004967132292522596, "loss": 2.9922, "step": 1209 }, { "epoch": 0.12452481249357855, "grad_norm": 0.273882657289505, "learning_rate": 0.004979457682826623, "loss": 2.9698, "step": 1212 }, { "epoch": 0.12483304222747355, "grad_norm": 0.3060019910335541, "learning_rate": 0.004991783073130649, "loss": 2.9925, "step": 1215 }, { "epoch": 0.12514127196136854, "grad_norm": 0.13856515288352966, "learning_rate": 0.005004108463434675, "loss": 3.0212, "step": 1218 }, { "epoch": 0.12544950169526353, "grad_norm": 0.12940354645252228, "learning_rate": 0.005016433853738702, "loss": 2.9472, "step": 1221 }, { "epoch": 0.12575773142915853, "grad_norm": 0.15493866801261902, "learning_rate": 0.005028759244042728, "loss": 2.9859, "step": 1224 }, { "epoch": 0.12606596116305352, "grad_norm": 0.4994816184043884, "learning_rate": 0.005041084634346754, "loss": 2.949, "step": 1227 }, { "epoch": 0.1263741908969485, "grad_norm": 0.37235137820243835, "learning_rate": 0.005053410024650781, "loss": 3.006, "step": 1230 }, { "epoch": 0.12668242063084353, "grad_norm": 0.24599948525428772, "learning_rate": 0.0050657354149548075, "loss": 2.9954, "step": 1233 }, { "epoch": 0.12699065036473853, "grad_norm": 0.1838703751564026, "learning_rate": 0.005078060805258833, "loss": 2.9886, "step": 1236 }, { "epoch": 0.12729888009863352, "grad_norm": 0.19366377592086792, "learning_rate": 0.005090386195562859, "loss": 2.9715, "step": 1239 }, { "epoch": 0.12760710983252851, "grad_norm": 0.11911759525537491, "learning_rate": 0.005102711585866886, "loss": 2.965, "step": 1242 }, { "epoch": 0.1279153395664235, "grad_norm": 0.12456653267145157, "learning_rate": 0.005115036976170912, "loss": 2.9343, "step": 1245 }, { "epoch": 0.1282235693003185, "grad_norm": 0.322380393743515, "learning_rate": 0.005127362366474938, "loss": 2.9604, "step": 1248 }, { "epoch": 0.1285317990342135, "grad_norm": 0.40975773334503174, "learning_rate": 0.005139687756778965, "loss": 2.9386, "step": 1251 }, { "epoch": 0.1288400287681085, "grad_norm": 0.2045045793056488, "learning_rate": 0.005152013147082991, "loss": 2.9459, "step": 1254 }, { "epoch": 0.12914825850200348, "grad_norm": 0.20005717873573303, "learning_rate": 0.005164338537387018, "loss": 2.9631, "step": 1257 }, { "epoch": 0.12945648823589848, "grad_norm": 0.18930204212665558, "learning_rate": 0.0051766639276910435, "loss": 2.9014, "step": 1260 }, { "epoch": 0.1297647179697935, "grad_norm": 0.3180810213088989, "learning_rate": 0.00518898931799507, "loss": 2.9242, "step": 1263 }, { "epoch": 0.1300729477036885, "grad_norm": 0.17843572795391083, "learning_rate": 0.005201314708299097, "loss": 2.9063, "step": 1266 }, { "epoch": 0.13038117743758348, "grad_norm": 0.12591248750686646, "learning_rate": 0.005213640098603123, "loss": 2.9095, "step": 1269 }, { "epoch": 0.13068940717147848, "grad_norm": 0.17976878583431244, "learning_rate": 0.005225965488907148, "loss": 2.928, "step": 1272 }, { "epoch": 0.13099763690537347, "grad_norm": 0.16759532690048218, "learning_rate": 0.005238290879211175, "loss": 2.9202, "step": 1275 }, { "epoch": 0.13130586663926846, "grad_norm": 0.27441859245300293, "learning_rate": 0.0052506162695152014, "loss": 2.9242, "step": 1278 }, { "epoch": 0.13161409637316346, "grad_norm": 0.23654502630233765, "learning_rate": 0.005262941659819228, "loss": 2.9175, "step": 1281 }, { "epoch": 0.13192232610705845, "grad_norm": 0.3399145007133484, "learning_rate": 0.005275267050123254, "loss": 2.9277, "step": 1284 }, { "epoch": 0.13223055584095345, "grad_norm": 0.199320450425148, "learning_rate": 0.00528759244042728, "loss": 2.9184, "step": 1287 }, { "epoch": 0.13253878557484847, "grad_norm": 0.16563403606414795, "learning_rate": 0.005299917830731307, "loss": 2.9166, "step": 1290 }, { "epoch": 0.13284701530874346, "grad_norm": 0.18119758367538452, "learning_rate": 0.005312243221035334, "loss": 2.9239, "step": 1293 }, { "epoch": 0.13315524504263845, "grad_norm": 0.1558375358581543, "learning_rate": 0.0053245686113393585, "loss": 2.9028, "step": 1296 }, { "epoch": 0.13346347477653345, "grad_norm": 0.36665746569633484, "learning_rate": 0.005336894001643385, "loss": 2.9081, "step": 1299 }, { "epoch": 0.13377170451042844, "grad_norm": 0.186012864112854, "learning_rate": 0.005349219391947412, "loss": 2.8836, "step": 1302 }, { "epoch": 0.13407993424432343, "grad_norm": 0.14102259278297424, "learning_rate": 0.005361544782251438, "loss": 2.8906, "step": 1305 }, { "epoch": 0.13438816397821843, "grad_norm": 0.12519022822380066, "learning_rate": 0.005373870172555464, "loss": 2.9148, "step": 1308 }, { "epoch": 0.13469639371211342, "grad_norm": 0.14027029275894165, "learning_rate": 0.005386195562859491, "loss": 2.9108, "step": 1311 }, { "epoch": 0.13500462344600841, "grad_norm": 0.2553085684776306, "learning_rate": 0.005398520953163517, "loss": 2.8837, "step": 1314 }, { "epoch": 0.1353128531799034, "grad_norm": 0.2809675335884094, "learning_rate": 0.005410846343467544, "loss": 2.8795, "step": 1317 }, { "epoch": 0.13562108291379843, "grad_norm": 0.19451378285884857, "learning_rate": 0.005423171733771569, "loss": 2.8648, "step": 1320 }, { "epoch": 0.13592931264769342, "grad_norm": 0.22285006940364838, "learning_rate": 0.005435497124075595, "loss": 2.8994, "step": 1323 }, { "epoch": 0.13623754238158842, "grad_norm": 0.14703693985939026, "learning_rate": 0.005447822514379622, "loss": 2.8984, "step": 1326 }, { "epoch": 0.1365457721154834, "grad_norm": 0.23260341584682465, "learning_rate": 0.005460147904683649, "loss": 2.863, "step": 1329 }, { "epoch": 0.1368540018493784, "grad_norm": 0.16448146104812622, "learning_rate": 0.005472473294987674, "loss": 2.8895, "step": 1332 }, { "epoch": 0.1371622315832734, "grad_norm": 0.1994483470916748, "learning_rate": 0.005484798685291701, "loss": 2.9012, "step": 1335 }, { "epoch": 0.1374704613171684, "grad_norm": 0.2786753177642822, "learning_rate": 0.0054971240755957275, "loss": 2.8753, "step": 1338 }, { "epoch": 0.13777869105106338, "grad_norm": 0.13169367611408234, "learning_rate": 0.005509449465899754, "loss": 2.8567, "step": 1341 }, { "epoch": 0.13808692078495838, "grad_norm": 0.21205192804336548, "learning_rate": 0.00552177485620378, "loss": 2.8523, "step": 1344 }, { "epoch": 0.1383951505188534, "grad_norm": 0.3462331295013428, "learning_rate": 0.0055341002465078065, "loss": 2.881, "step": 1347 }, { "epoch": 0.1387033802527484, "grad_norm": 0.26768332719802856, "learning_rate": 0.005546425636811832, "loss": 2.8803, "step": 1350 }, { "epoch": 0.1390116099866434, "grad_norm": 0.22518084943294525, "learning_rate": 0.005558751027115859, "loss": 2.874, "step": 1353 }, { "epoch": 0.13931983972053838, "grad_norm": 0.1767919361591339, "learning_rate": 0.005571076417419885, "loss": 2.8593, "step": 1356 }, { "epoch": 0.13962806945443337, "grad_norm": 0.14405187964439392, "learning_rate": 0.005583401807723911, "loss": 2.8576, "step": 1359 }, { "epoch": 0.13993629918832837, "grad_norm": 0.15364724397659302, "learning_rate": 0.005595727198027938, "loss": 2.856, "step": 1362 }, { "epoch": 0.14024452892222336, "grad_norm": 0.26737314462661743, "learning_rate": 0.005608052588331964, "loss": 2.8225, "step": 1365 }, { "epoch": 0.14055275865611835, "grad_norm": 0.14594382047653198, "learning_rate": 0.00562037797863599, "loss": 2.8397, "step": 1368 }, { "epoch": 0.14086098839001335, "grad_norm": 0.1974790245294571, "learning_rate": 0.005632703368940017, "loss": 2.8294, "step": 1371 }, { "epoch": 0.14116921812390834, "grad_norm": 0.12267682701349258, "learning_rate": 0.005645028759244043, "loss": 2.8543, "step": 1374 }, { "epoch": 0.14147744785780336, "grad_norm": 0.14111129939556122, "learning_rate": 0.00565735414954807, "loss": 2.8181, "step": 1377 }, { "epoch": 0.14178567759169836, "grad_norm": 0.1846015751361847, "learning_rate": 0.005669679539852095, "loss": 2.8272, "step": 1380 }, { "epoch": 0.14209390732559335, "grad_norm": 0.26931676268577576, "learning_rate": 0.0056820049301561214, "loss": 2.8286, "step": 1383 }, { "epoch": 0.14240213705948834, "grad_norm": 0.17969557642936707, "learning_rate": 0.005694330320460148, "loss": 2.8315, "step": 1386 }, { "epoch": 0.14271036679338334, "grad_norm": 0.2056432068347931, "learning_rate": 0.005706655710764175, "loss": 2.835, "step": 1389 }, { "epoch": 0.14301859652727833, "grad_norm": 0.29306477308273315, "learning_rate": 0.0057189811010682, "loss": 2.8294, "step": 1392 }, { "epoch": 0.14332682626117332, "grad_norm": 0.1792561262845993, "learning_rate": 0.005731306491372227, "loss": 2.8321, "step": 1395 }, { "epoch": 0.14363505599506832, "grad_norm": 0.11323501914739609, "learning_rate": 0.005743631881676254, "loss": 2.83, "step": 1398 }, { "epoch": 0.1439432857289633, "grad_norm": 0.2804841101169586, "learning_rate": 0.00575595727198028, "loss": 2.8271, "step": 1401 }, { "epoch": 0.1442515154628583, "grad_norm": 0.33056163787841797, "learning_rate": 0.005768282662284305, "loss": 2.7976, "step": 1404 }, { "epoch": 0.14455974519675333, "grad_norm": 0.12834665179252625, "learning_rate": 0.005780608052588332, "loss": 2.8169, "step": 1407 }, { "epoch": 0.14486797493064832, "grad_norm": 0.15917035937309265, "learning_rate": 0.005792933442892358, "loss": 2.8124, "step": 1410 }, { "epoch": 0.1451762046645433, "grad_norm": 0.28015008568763733, "learning_rate": 0.005805258833196385, "loss": 2.8019, "step": 1413 }, { "epoch": 0.1454844343984383, "grad_norm": 0.16829009354114532, "learning_rate": 0.005817584223500411, "loss": 2.8357, "step": 1416 }, { "epoch": 0.1457926641323333, "grad_norm": 0.14804339408874512, "learning_rate": 0.005829909613804437, "loss": 2.8102, "step": 1419 }, { "epoch": 0.1461008938662283, "grad_norm": 0.20360830426216125, "learning_rate": 0.005842235004108464, "loss": 2.8211, "step": 1422 }, { "epoch": 0.1464091236001233, "grad_norm": 0.22152036428451538, "learning_rate": 0.0058545603944124905, "loss": 2.8103, "step": 1425 }, { "epoch": 0.14671735333401828, "grad_norm": 0.20746375620365143, "learning_rate": 0.005866885784716516, "loss": 2.7994, "step": 1428 }, { "epoch": 0.14702558306791327, "grad_norm": 0.16845661401748657, "learning_rate": 0.005879211175020542, "loss": 2.8286, "step": 1431 }, { "epoch": 0.1473338128018083, "grad_norm": 0.1094370111823082, "learning_rate": 0.005891536565324569, "loss": 2.7888, "step": 1434 }, { "epoch": 0.1476420425357033, "grad_norm": 0.14844520390033722, "learning_rate": 0.005903861955628595, "loss": 2.8035, "step": 1437 }, { "epoch": 0.14795027226959828, "grad_norm": 0.12289691716432571, "learning_rate": 0.005916187345932621, "loss": 2.7852, "step": 1440 }, { "epoch": 0.14825850200349328, "grad_norm": 0.1203322485089302, "learning_rate": 0.0059285127362366475, "loss": 2.8101, "step": 1443 }, { "epoch": 0.14856673173738827, "grad_norm": 0.1871965080499649, "learning_rate": 0.005940838126540674, "loss": 2.7485, "step": 1446 }, { "epoch": 0.14887496147128326, "grad_norm": 0.1567300707101822, "learning_rate": 0.005953163516844701, "loss": 2.8097, "step": 1449 }, { "epoch": 0.14918319120517826, "grad_norm": 0.18046674132347107, "learning_rate": 0.0059654889071487265, "loss": 2.8118, "step": 1452 }, { "epoch": 0.14949142093907325, "grad_norm": 0.23180244863033295, "learning_rate": 0.005977814297452753, "loss": 2.7836, "step": 1455 }, { "epoch": 0.14979965067296824, "grad_norm": 0.2300175577402115, "learning_rate": 0.00599013968775678, "loss": 2.7675, "step": 1458 }, { "epoch": 0.15010788040686324, "grad_norm": 0.11340396106243134, "learning_rate": 0.006002465078060806, "loss": 2.8012, "step": 1461 }, { "epoch": 0.15041611014075826, "grad_norm": 0.10667074471712112, "learning_rate": 0.006014790468364831, "loss": 2.8154, "step": 1464 }, { "epoch": 0.15072433987465325, "grad_norm": 0.10800652205944061, "learning_rate": 0.006027115858668858, "loss": 2.7646, "step": 1467 }, { "epoch": 0.15103256960854824, "grad_norm": 0.2588643431663513, "learning_rate": 0.006039441248972884, "loss": 2.7912, "step": 1470 }, { "epoch": 0.15134079934244324, "grad_norm": 0.32462435960769653, "learning_rate": 0.006051766639276911, "loss": 2.7666, "step": 1473 }, { "epoch": 0.15164902907633823, "grad_norm": 0.23754975199699402, "learning_rate": 0.006064092029580937, "loss": 2.7694, "step": 1476 }, { "epoch": 0.15195725881023323, "grad_norm": 0.14895015954971313, "learning_rate": 0.006076417419884963, "loss": 2.7678, "step": 1479 }, { "epoch": 0.15226548854412822, "grad_norm": 0.3228299021720886, "learning_rate": 0.00608874281018899, "loss": 2.7786, "step": 1482 }, { "epoch": 0.1525737182780232, "grad_norm": 0.15597562491893768, "learning_rate": 0.006101068200493017, "loss": 2.7967, "step": 1485 }, { "epoch": 0.1528819480119182, "grad_norm": 0.09748488664627075, "learning_rate": 0.0061133935907970414, "loss": 2.7673, "step": 1488 }, { "epoch": 0.1531901777458132, "grad_norm": 0.12523339688777924, "learning_rate": 0.006125718981101068, "loss": 2.7391, "step": 1491 }, { "epoch": 0.15349840747970822, "grad_norm": 0.16529253125190735, "learning_rate": 0.006138044371405095, "loss": 2.7642, "step": 1494 }, { "epoch": 0.15380663721360321, "grad_norm": 0.2083311527967453, "learning_rate": 0.006150369761709121, "loss": 2.764, "step": 1497 }, { "epoch": 0.1541148669474982, "grad_norm": 0.13263079524040222, "learning_rate": 0.006162695152013147, "loss": 2.7828, "step": 1500 }, { "epoch": 0.1544230966813932, "grad_norm": 0.1473417580127716, "learning_rate": 0.006175020542317174, "loss": 2.7574, "step": 1503 }, { "epoch": 0.1547313264152882, "grad_norm": 0.22629734873771667, "learning_rate": 0.0061873459326212, "loss": 2.7792, "step": 1506 }, { "epoch": 0.1550395561491832, "grad_norm": 0.21652548015117645, "learning_rate": 0.006199671322925227, "loss": 2.7785, "step": 1509 }, { "epoch": 0.15534778588307818, "grad_norm": 0.1948641836643219, "learning_rate": 0.006211996713229252, "loss": 2.7969, "step": 1512 }, { "epoch": 0.15565601561697318, "grad_norm": 0.13890105485916138, "learning_rate": 0.006224322103533278, "loss": 2.7856, "step": 1515 }, { "epoch": 0.15596424535086817, "grad_norm": 0.09859870374202728, "learning_rate": 0.006236647493837305, "loss": 2.7523, "step": 1518 }, { "epoch": 0.1562724750847632, "grad_norm": 0.10258977860212326, "learning_rate": 0.0062489728841413315, "loss": 2.7466, "step": 1521 }, { "epoch": 0.15658070481865818, "grad_norm": 0.11476584523916245, "learning_rate": 0.006261298274445357, "loss": 2.7314, "step": 1524 }, { "epoch": 0.15688893455255318, "grad_norm": 0.1920320987701416, "learning_rate": 0.006273623664749384, "loss": 2.7647, "step": 1527 }, { "epoch": 0.15719716428644817, "grad_norm": 0.18576020002365112, "learning_rate": 0.0062859490550534105, "loss": 2.7632, "step": 1530 }, { "epoch": 0.15750539402034316, "grad_norm": 0.128046452999115, "learning_rate": 0.006298274445357437, "loss": 2.7237, "step": 1533 }, { "epoch": 0.15781362375423816, "grad_norm": 0.30617430806159973, "learning_rate": 0.006310599835661463, "loss": 2.7907, "step": 1536 }, { "epoch": 0.15812185348813315, "grad_norm": 0.140928253531456, "learning_rate": 0.0063229252259654894, "loss": 2.7879, "step": 1539 }, { "epoch": 0.15843008322202815, "grad_norm": 0.2537645399570465, "learning_rate": 0.006335250616269515, "loss": 2.7513, "step": 1542 }, { "epoch": 0.15873831295592314, "grad_norm": 0.40944191813468933, "learning_rate": 0.006347576006573542, "loss": 2.7418, "step": 1545 }, { "epoch": 0.15904654268981813, "grad_norm": 0.1284068077802658, "learning_rate": 0.0063599013968775675, "loss": 2.7235, "step": 1548 }, { "epoch": 0.15935477242371315, "grad_norm": 0.08984164893627167, "learning_rate": 0.006372226787181594, "loss": 2.7414, "step": 1551 }, { "epoch": 0.15966300215760815, "grad_norm": 0.13366155326366425, "learning_rate": 0.006384552177485621, "loss": 2.7456, "step": 1554 }, { "epoch": 0.15997123189150314, "grad_norm": 0.1179983913898468, "learning_rate": 0.006396877567789647, "loss": 2.7313, "step": 1557 }, { "epoch": 0.16027946162539813, "grad_norm": 0.15718503296375275, "learning_rate": 0.006409202958093673, "loss": 2.7315, "step": 1560 }, { "epoch": 0.16058769135929313, "grad_norm": 0.14405110478401184, "learning_rate": 0.0064215283483977, "loss": 2.7275, "step": 1563 }, { "epoch": 0.16089592109318812, "grad_norm": 0.13050544261932373, "learning_rate": 0.006433853738701726, "loss": 2.6935, "step": 1566 }, { "epoch": 0.16120415082708311, "grad_norm": 0.2343079298734665, "learning_rate": 0.006446179129005751, "loss": 2.6932, "step": 1569 }, { "epoch": 0.1615123805609781, "grad_norm": 0.2493698000907898, "learning_rate": 0.006458504519309778, "loss": 2.7414, "step": 1572 }, { "epoch": 0.1618206102948731, "grad_norm": 0.17371931672096252, "learning_rate": 0.006470829909613804, "loss": 2.7522, "step": 1575 }, { "epoch": 0.1621288400287681, "grad_norm": 0.16282691061496735, "learning_rate": 0.006483155299917831, "loss": 2.7659, "step": 1578 }, { "epoch": 0.16243706976266312, "grad_norm": 0.12791027128696442, "learning_rate": 0.006495480690221857, "loss": 2.7077, "step": 1581 }, { "epoch": 0.1627452994965581, "grad_norm": 0.09789251536130905, "learning_rate": 0.006507806080525883, "loss": 2.7041, "step": 1584 }, { "epoch": 0.1630535292304531, "grad_norm": 0.10156393051147461, "learning_rate": 0.00652013147082991, "loss": 2.685, "step": 1587 }, { "epoch": 0.1633617589643481, "grad_norm": 0.1974211484193802, "learning_rate": 0.006532456861133937, "loss": 2.7183, "step": 1590 }, { "epoch": 0.1636699886982431, "grad_norm": 0.1420728713274002, "learning_rate": 0.0065447822514379615, "loss": 2.7095, "step": 1593 }, { "epoch": 0.16397821843213808, "grad_norm": 0.3637617528438568, "learning_rate": 0.006557107641741988, "loss": 2.7578, "step": 1596 }, { "epoch": 0.16428644816603308, "grad_norm": 0.09830935299396515, "learning_rate": 0.006569433032046015, "loss": 2.6937, "step": 1599 }, { "epoch": 0.16459467789992807, "grad_norm": 0.15821218490600586, "learning_rate": 0.006581758422350041, "loss": 2.7031, "step": 1602 }, { "epoch": 0.16490290763382306, "grad_norm": 0.17226357758045197, "learning_rate": 0.006594083812654067, "loss": 2.6702, "step": 1605 }, { "epoch": 0.16521113736771809, "grad_norm": 0.21252015233039856, "learning_rate": 0.006606409202958094, "loss": 2.6893, "step": 1608 }, { "epoch": 0.16551936710161308, "grad_norm": 0.11433108150959015, "learning_rate": 0.00661873459326212, "loss": 2.6852, "step": 1611 }, { "epoch": 0.16582759683550807, "grad_norm": 0.15884144604206085, "learning_rate": 0.006631059983566147, "loss": 2.7164, "step": 1614 }, { "epoch": 0.16613582656940307, "grad_norm": 0.1429038643836975, "learning_rate": 0.006643385373870173, "loss": 2.6976, "step": 1617 }, { "epoch": 0.16644405630329806, "grad_norm": 0.09187953919172287, "learning_rate": 0.006655710764174198, "loss": 2.7134, "step": 1620 }, { "epoch": 0.16675228603719305, "grad_norm": 0.13670755922794342, "learning_rate": 0.006668036154478225, "loss": 2.6951, "step": 1623 }, { "epoch": 0.16706051577108805, "grad_norm": 0.17965632677078247, "learning_rate": 0.0066803615447822515, "loss": 2.6911, "step": 1626 }, { "epoch": 0.16736874550498304, "grad_norm": 0.21141032874584198, "learning_rate": 0.006692686935086277, "loss": 2.67, "step": 1629 }, { "epoch": 0.16767697523887803, "grad_norm": 0.30064719915390015, "learning_rate": 0.006705012325390304, "loss": 2.6837, "step": 1632 }, { "epoch": 0.16798520497277303, "grad_norm": 0.11874115467071533, "learning_rate": 0.0067173377156943305, "loss": 2.6968, "step": 1635 }, { "epoch": 0.16829343470666805, "grad_norm": 0.10265806317329407, "learning_rate": 0.006729663105998357, "loss": 2.6632, "step": 1638 }, { "epoch": 0.16860166444056304, "grad_norm": 0.10916320979595184, "learning_rate": 0.006741988496302383, "loss": 2.6749, "step": 1641 }, { "epoch": 0.16890989417445804, "grad_norm": 0.2549231946468353, "learning_rate": 0.0067543138866064095, "loss": 2.636, "step": 1644 }, { "epoch": 0.16921812390835303, "grad_norm": 0.15071339905261993, "learning_rate": 0.006766639276910436, "loss": 2.6933, "step": 1647 }, { "epoch": 0.16952635364224802, "grad_norm": 0.1088666021823883, "learning_rate": 0.006778964667214463, "loss": 2.6477, "step": 1650 }, { "epoch": 0.16983458337614302, "grad_norm": 0.0984036773443222, "learning_rate": 0.0067912900575184875, "loss": 2.6801, "step": 1653 }, { "epoch": 0.170142813110038, "grad_norm": 0.15402089059352875, "learning_rate": 0.006803615447822514, "loss": 2.6877, "step": 1656 }, { "epoch": 0.170451042843933, "grad_norm": 0.1299775093793869, "learning_rate": 0.006815940838126541, "loss": 2.6717, "step": 1659 }, { "epoch": 0.170759272577828, "grad_norm": 0.15615323185920715, "learning_rate": 0.006828266228430567, "loss": 2.6578, "step": 1662 }, { "epoch": 0.171067502311723, "grad_norm": 0.122567318379879, "learning_rate": 0.006840591618734593, "loss": 2.6959, "step": 1665 }, { "epoch": 0.171375732045618, "grad_norm": 0.1386043280363083, "learning_rate": 0.00685291700903862, "loss": 2.6491, "step": 1668 }, { "epoch": 0.171683961779513, "grad_norm": 0.1900375783443451, "learning_rate": 0.006865242399342646, "loss": 2.6643, "step": 1671 }, { "epoch": 0.171992191513408, "grad_norm": 0.1118064671754837, "learning_rate": 0.006877567789646673, "loss": 2.6496, "step": 1674 }, { "epoch": 0.172300421247303, "grad_norm": 0.1593448519706726, "learning_rate": 0.006889893179950698, "loss": 2.6833, "step": 1677 }, { "epoch": 0.172608650981198, "grad_norm": 0.17275281250476837, "learning_rate": 0.006902218570254724, "loss": 2.6909, "step": 1680 }, { "epoch": 0.17291688071509298, "grad_norm": 0.13396479189395905, "learning_rate": 0.006914543960558751, "loss": 2.692, "step": 1683 }, { "epoch": 0.17322511044898797, "grad_norm": 0.09812068939208984, "learning_rate": 0.006926869350862778, "loss": 2.6939, "step": 1686 }, { "epoch": 0.17353334018288297, "grad_norm": 0.08181022852659225, "learning_rate": 0.006939194741166803, "loss": 2.6408, "step": 1689 }, { "epoch": 0.17384156991677796, "grad_norm": 0.15573051571846008, "learning_rate": 0.00695152013147083, "loss": 2.6647, "step": 1692 }, { "epoch": 0.17414979965067298, "grad_norm": 0.2834240198135376, "learning_rate": 0.006963845521774857, "loss": 2.6585, "step": 1695 }, { "epoch": 0.17445802938456798, "grad_norm": 0.23794801533222198, "learning_rate": 0.006976170912078883, "loss": 2.6559, "step": 1698 }, { "epoch": 0.17476625911846297, "grad_norm": 0.1332167536020279, "learning_rate": 0.006988496302382908, "loss": 2.6695, "step": 1701 }, { "epoch": 0.17507448885235796, "grad_norm": 0.09555593878030777, "learning_rate": 0.007000821692686935, "loss": 2.6811, "step": 1704 }, { "epoch": 0.17538271858625296, "grad_norm": 0.10987939685583115, "learning_rate": 0.007013147082990961, "loss": 2.6524, "step": 1707 }, { "epoch": 0.17569094832014795, "grad_norm": 0.11458218097686768, "learning_rate": 0.007025472473294988, "loss": 2.6085, "step": 1710 }, { "epoch": 0.17599917805404294, "grad_norm": 0.12646709382534027, "learning_rate": 0.007037797863599014, "loss": 2.6561, "step": 1713 }, { "epoch": 0.17630740778793794, "grad_norm": 0.15338967740535736, "learning_rate": 0.00705012325390304, "loss": 2.6471, "step": 1716 }, { "epoch": 0.17661563752183293, "grad_norm": 0.14660318195819855, "learning_rate": 0.007062448644207067, "loss": 2.6532, "step": 1719 }, { "epoch": 0.17692386725572792, "grad_norm": 0.2730877995491028, "learning_rate": 0.0070747740345110935, "loss": 2.6565, "step": 1722 }, { "epoch": 0.17723209698962294, "grad_norm": 0.26743727922439575, "learning_rate": 0.007087099424815119, "loss": 2.6707, "step": 1725 }, { "epoch": 0.17754032672351794, "grad_norm": 0.13842618465423584, "learning_rate": 0.007099424815119146, "loss": 2.6652, "step": 1728 }, { "epoch": 0.17784855645741293, "grad_norm": 0.15871621668338776, "learning_rate": 0.0071117502054231715, "loss": 2.6464, "step": 1731 }, { "epoch": 0.17815678619130793, "grad_norm": 0.11526347696781158, "learning_rate": 0.007124075595727198, "loss": 2.662, "step": 1734 }, { "epoch": 0.17846501592520292, "grad_norm": 0.21620534360408783, "learning_rate": 0.007136400986031224, "loss": 2.6603, "step": 1737 }, { "epoch": 0.1787732456590979, "grad_norm": 0.0905444398522377, "learning_rate": 0.0071487263763352505, "loss": 2.6523, "step": 1740 }, { "epoch": 0.1790814753929929, "grad_norm": 0.28233054280281067, "learning_rate": 0.007161051766639277, "loss": 2.6597, "step": 1743 }, { "epoch": 0.1793897051268879, "grad_norm": 0.2363336831331253, "learning_rate": 0.007173377156943304, "loss": 2.6483, "step": 1746 }, { "epoch": 0.1796979348607829, "grad_norm": 0.11012139916419983, "learning_rate": 0.0071857025472473295, "loss": 2.6513, "step": 1749 }, { "epoch": 0.1800061645946779, "grad_norm": 0.09720948338508606, "learning_rate": 0.007198027937551356, "loss": 2.6511, "step": 1752 }, { "epoch": 0.1803143943285729, "grad_norm": 0.13130852580070496, "learning_rate": 0.007210353327855383, "loss": 2.6509, "step": 1755 }, { "epoch": 0.1806226240624679, "grad_norm": 0.14865098893642426, "learning_rate": 0.007222678718159409, "loss": 2.6253, "step": 1758 }, { "epoch": 0.1809308537963629, "grad_norm": 0.20482710003852844, "learning_rate": 0.007235004108463434, "loss": 2.6312, "step": 1761 }, { "epoch": 0.1812390835302579, "grad_norm": 0.12063097953796387, "learning_rate": 0.007247329498767461, "loss": 2.6007, "step": 1764 }, { "epoch": 0.18154731326415288, "grad_norm": 0.23084934055805206, "learning_rate": 0.007259654889071487, "loss": 2.6129, "step": 1767 }, { "epoch": 0.18185554299804788, "grad_norm": 0.10387217253446579, "learning_rate": 0.007271980279375514, "loss": 2.6309, "step": 1770 }, { "epoch": 0.18216377273194287, "grad_norm": 0.14229682087898254, "learning_rate": 0.00728430566967954, "loss": 2.6074, "step": 1773 }, { "epoch": 0.18247200246583786, "grad_norm": 0.12009115517139435, "learning_rate": 0.007296631059983566, "loss": 2.6407, "step": 1776 }, { "epoch": 0.18278023219973286, "grad_norm": 0.15677185356616974, "learning_rate": 0.007308956450287593, "loss": 2.6268, "step": 1779 }, { "epoch": 0.18308846193362788, "grad_norm": 0.13304303586483002, "learning_rate": 0.0073212818405916195, "loss": 2.6463, "step": 1782 }, { "epoch": 0.18339669166752287, "grad_norm": 0.15444768965244293, "learning_rate": 0.007333607230895644, "loss": 2.6218, "step": 1785 }, { "epoch": 0.18370492140141786, "grad_norm": 0.1738140732049942, "learning_rate": 0.007345932621199671, "loss": 2.6525, "step": 1788 }, { "epoch": 0.18401315113531286, "grad_norm": 0.13087227940559387, "learning_rate": 0.007358258011503698, "loss": 2.6266, "step": 1791 }, { "epoch": 0.18432138086920785, "grad_norm": 0.1026511862874031, "learning_rate": 0.007370583401807724, "loss": 2.6017, "step": 1794 }, { "epoch": 0.18462961060310285, "grad_norm": 0.11183813214302063, "learning_rate": 0.00738290879211175, "loss": 2.5966, "step": 1797 }, { "epoch": 0.18493784033699784, "grad_norm": 0.12239934504032135, "learning_rate": 0.007395234182415777, "loss": 2.6205, "step": 1800 }, { "epoch": 0.18524607007089283, "grad_norm": 0.2630854845046997, "learning_rate": 0.007407559572719803, "loss": 2.609, "step": 1803 }, { "epoch": 0.18555429980478783, "grad_norm": 0.24282613396644592, "learning_rate": 0.00741988496302383, "loss": 2.6405, "step": 1806 }, { "epoch": 0.18586252953868282, "grad_norm": 0.2825084328651428, "learning_rate": 0.007432210353327855, "loss": 2.5933, "step": 1809 }, { "epoch": 0.18617075927257784, "grad_norm": 0.26462721824645996, "learning_rate": 0.007444535743631881, "loss": 2.6021, "step": 1812 }, { "epoch": 0.18647898900647283, "grad_norm": 0.11797992140054703, "learning_rate": 0.007456861133935908, "loss": 2.6246, "step": 1815 }, { "epoch": 0.18678721874036783, "grad_norm": 0.14044708013534546, "learning_rate": 0.0074691865242399345, "loss": 2.6028, "step": 1818 }, { "epoch": 0.18709544847426282, "grad_norm": 0.1374548226594925, "learning_rate": 0.00748151191454396, "loss": 2.6092, "step": 1821 }, { "epoch": 0.18740367820815781, "grad_norm": 0.10084279626607895, "learning_rate": 0.007493837304847987, "loss": 2.6162, "step": 1824 }, { "epoch": 0.1877119079420528, "grad_norm": 0.1052001565694809, "learning_rate": 0.0075061626951520135, "loss": 2.5742, "step": 1827 }, { "epoch": 0.1880201376759478, "grad_norm": 0.11738535761833191, "learning_rate": 0.00751848808545604, "loss": 2.5715, "step": 1830 }, { "epoch": 0.1883283674098428, "grad_norm": 0.10453224182128906, "learning_rate": 0.007530813475760066, "loss": 2.5896, "step": 1833 }, { "epoch": 0.1886365971437378, "grad_norm": 0.10509374737739563, "learning_rate": 0.007543138866064092, "loss": 2.6047, "step": 1836 }, { "epoch": 0.18894482687763278, "grad_norm": 0.11291799694299698, "learning_rate": 0.007555464256368119, "loss": 2.6062, "step": 1839 }, { "epoch": 0.1892530566115278, "grad_norm": 0.11998583376407623, "learning_rate": 0.007567789646672146, "loss": 2.629, "step": 1842 }, { "epoch": 0.1895612863454228, "grad_norm": 0.21776226162910461, "learning_rate": 0.0075801150369761705, "loss": 2.5847, "step": 1845 }, { "epoch": 0.1898695160793178, "grad_norm": 0.210985466837883, "learning_rate": 0.007592440427280197, "loss": 2.5901, "step": 1848 }, { "epoch": 0.19017774581321278, "grad_norm": 0.11799308657646179, "learning_rate": 0.007604765817584224, "loss": 2.5893, "step": 1851 }, { "epoch": 0.19048597554710778, "grad_norm": 0.10019934922456741, "learning_rate": 0.00761709120788825, "loss": 2.6327, "step": 1854 }, { "epoch": 0.19079420528100277, "grad_norm": 0.07964596897363663, "learning_rate": 0.007629416598192276, "loss": 2.5921, "step": 1857 }, { "epoch": 0.19110243501489776, "grad_norm": 0.16393065452575684, "learning_rate": 0.007641741988496303, "loss": 2.5912, "step": 1860 }, { "epoch": 0.19141066474879276, "grad_norm": 0.324639230966568, "learning_rate": 0.007654067378800329, "loss": 2.5998, "step": 1863 }, { "epoch": 0.19171889448268775, "grad_norm": 0.14071421325206757, "learning_rate": 0.007666392769104356, "loss": 2.5803, "step": 1866 }, { "epoch": 0.19202712421658277, "grad_norm": 0.20063026249408722, "learning_rate": 0.007678718159408381, "loss": 2.6019, "step": 1869 }, { "epoch": 0.19233535395047777, "grad_norm": 0.11311519891023636, "learning_rate": 0.007691043549712407, "loss": 2.5645, "step": 1872 }, { "epoch": 0.19264358368437276, "grad_norm": 0.08542342483997345, "learning_rate": 0.007703368940016434, "loss": 2.6122, "step": 1875 }, { "epoch": 0.19295181341826775, "grad_norm": 0.08306868374347687, "learning_rate": 0.007715694330320461, "loss": 2.5859, "step": 1878 }, { "epoch": 0.19326004315216275, "grad_norm": 0.11635984480381012, "learning_rate": 0.007728019720624486, "loss": 2.5855, "step": 1881 }, { "epoch": 0.19356827288605774, "grad_norm": 0.08945252746343613, "learning_rate": 0.007740345110928513, "loss": 2.5509, "step": 1884 }, { "epoch": 0.19387650261995273, "grad_norm": 0.19044962525367737, "learning_rate": 0.0077526705012325395, "loss": 2.559, "step": 1887 }, { "epoch": 0.19418473235384773, "grad_norm": 0.1462780088186264, "learning_rate": 0.007764995891536566, "loss": 2.5749, "step": 1890 }, { "epoch": 0.19449296208774272, "grad_norm": 0.15944691002368927, "learning_rate": 0.007777321281840591, "loss": 2.5801, "step": 1893 }, { "epoch": 0.19480119182163771, "grad_norm": 0.10125305503606796, "learning_rate": 0.007789646672144618, "loss": 2.5821, "step": 1896 }, { "epoch": 0.19510942155553274, "grad_norm": 0.17344938218593597, "learning_rate": 0.007801972062448644, "loss": 2.5905, "step": 1899 }, { "epoch": 0.19541765128942773, "grad_norm": 0.16651591658592224, "learning_rate": 0.007814297452752672, "loss": 2.5668, "step": 1902 }, { "epoch": 0.19572588102332272, "grad_norm": 0.17417702078819275, "learning_rate": 0.007826622843056696, "loss": 2.568, "step": 1905 }, { "epoch": 0.19603411075721772, "grad_norm": 0.11182334274053574, "learning_rate": 0.007838948233360723, "loss": 2.5547, "step": 1908 }, { "epoch": 0.1963423404911127, "grad_norm": 0.23256631195545197, "learning_rate": 0.007851273623664749, "loss": 2.5722, "step": 1911 }, { "epoch": 0.1966505702250077, "grad_norm": 0.18180392682552338, "learning_rate": 0.007863599013968776, "loss": 2.558, "step": 1914 }, { "epoch": 0.1969587999589027, "grad_norm": 0.12168890237808228, "learning_rate": 0.007875924404272802, "loss": 2.5977, "step": 1917 }, { "epoch": 0.1972670296927977, "grad_norm": 0.11032187938690186, "learning_rate": 0.007888249794576828, "loss": 2.5846, "step": 1920 }, { "epoch": 0.19757525942669268, "grad_norm": 0.0740116760134697, "learning_rate": 0.007900575184880855, "loss": 2.5824, "step": 1923 }, { "epoch": 0.19788348916058768, "grad_norm": 0.05902474746108055, "learning_rate": 0.007912900575184881, "loss": 2.5497, "step": 1926 }, { "epoch": 0.1981917188944827, "grad_norm": 0.09003309905529022, "learning_rate": 0.007925225965488907, "loss": 2.5523, "step": 1929 }, { "epoch": 0.1984999486283777, "grad_norm": 0.4191035330295563, "learning_rate": 0.007937551355792934, "loss": 2.6223, "step": 1932 }, { "epoch": 0.1988081783622727, "grad_norm": 0.17093214392662048, "learning_rate": 0.00794987674609696, "loss": 2.5647, "step": 1935 }, { "epoch": 0.19911640809616768, "grad_norm": 0.0921127051115036, "learning_rate": 0.007962202136400986, "loss": 2.564, "step": 1938 }, { "epoch": 0.19942463783006267, "grad_norm": 0.14204134047031403, "learning_rate": 0.007974527526705012, "loss": 2.5972, "step": 1941 }, { "epoch": 0.19973286756395767, "grad_norm": 0.07556895911693573, "learning_rate": 0.007986852917009039, "loss": 2.5796, "step": 1944 }, { "epoch": 0.20004109729785266, "grad_norm": 0.07290320843458176, "learning_rate": 0.007999178307313065, "loss": 2.5564, "step": 1947 }, { "epoch": 0.20034932703174765, "grad_norm": 0.1624913364648819, "learning_rate": 0.008011503697617092, "loss": 2.5849, "step": 1950 }, { "epoch": 0.20065755676564265, "grad_norm": 0.11839967221021652, "learning_rate": 0.008023829087921118, "loss": 2.5611, "step": 1953 }, { "epoch": 0.20096578649953767, "grad_norm": 0.14280788600444794, "learning_rate": 0.008036154478225144, "loss": 2.5289, "step": 1956 }, { "epoch": 0.20127401623343266, "grad_norm": 0.11515247821807861, "learning_rate": 0.008048479868529171, "loss": 2.5678, "step": 1959 }, { "epoch": 0.20158224596732766, "grad_norm": 0.1147715225815773, "learning_rate": 0.008060805258833197, "loss": 2.5452, "step": 1962 }, { "epoch": 0.20189047570122265, "grad_norm": 0.09767001122236252, "learning_rate": 0.008073130649137223, "loss": 2.6023, "step": 1965 }, { "epoch": 0.20219870543511764, "grad_norm": 0.0866391509771347, "learning_rate": 0.008085456039441248, "loss": 2.5518, "step": 1968 }, { "epoch": 0.20250693516901264, "grad_norm": 0.1610632985830307, "learning_rate": 0.008097781429745276, "loss": 2.5271, "step": 1971 }, { "epoch": 0.20281516490290763, "grad_norm": 0.20238341391086578, "learning_rate": 0.008110106820049302, "loss": 2.5597, "step": 1974 }, { "epoch": 0.20312339463680262, "grad_norm": 0.11807162314653397, "learning_rate": 0.008122432210353327, "loss": 2.5663, "step": 1977 }, { "epoch": 0.20343162437069762, "grad_norm": 0.14654900133609772, "learning_rate": 0.008134757600657355, "loss": 2.5729, "step": 1980 }, { "epoch": 0.2037398541045926, "grad_norm": 0.17804567515850067, "learning_rate": 0.00814708299096138, "loss": 2.5658, "step": 1983 }, { "epoch": 0.20404808383848763, "grad_norm": 0.12376303225755692, "learning_rate": 0.008159408381265408, "loss": 2.5703, "step": 1986 }, { "epoch": 0.20435631357238263, "grad_norm": 0.1248418316245079, "learning_rate": 0.008171733771569432, "loss": 2.5328, "step": 1989 }, { "epoch": 0.20466454330627762, "grad_norm": 0.08159278333187103, "learning_rate": 0.00818405916187346, "loss": 2.5349, "step": 1992 }, { "epoch": 0.2049727730401726, "grad_norm": 0.11184779554605484, "learning_rate": 0.008196384552177485, "loss": 2.5557, "step": 1995 }, { "epoch": 0.2052810027740676, "grad_norm": 0.09568610787391663, "learning_rate": 0.008208709942481513, "loss": 2.5415, "step": 1998 }, { "epoch": 0.2055892325079626, "grad_norm": 0.08708583563566208, "learning_rate": 0.008221035332785539, "loss": 2.5369, "step": 2001 }, { "epoch": 0.2058974622418576, "grad_norm": 0.11849135160446167, "learning_rate": 0.008233360723089564, "loss": 2.5617, "step": 2004 }, { "epoch": 0.2062056919757526, "grad_norm": 0.1407340168952942, "learning_rate": 0.008245686113393592, "loss": 2.5374, "step": 2007 }, { "epoch": 0.20651392170964758, "grad_norm": 0.13198955357074738, "learning_rate": 0.008258011503697617, "loss": 2.57, "step": 2010 }, { "epoch": 0.20682215144354257, "grad_norm": 0.12408044934272766, "learning_rate": 0.008270336894001643, "loss": 2.5344, "step": 2013 }, { "epoch": 0.2071303811774376, "grad_norm": 0.149169921875, "learning_rate": 0.008282662284305669, "loss": 2.5357, "step": 2016 }, { "epoch": 0.2074386109113326, "grad_norm": 0.10010293871164322, "learning_rate": 0.008294987674609696, "loss": 2.5166, "step": 2019 }, { "epoch": 0.20774684064522758, "grad_norm": 0.17650344967842102, "learning_rate": 0.008307313064913722, "loss": 2.5664, "step": 2022 }, { "epoch": 0.20805507037912258, "grad_norm": 0.09946206212043762, "learning_rate": 0.008319638455217748, "loss": 2.5378, "step": 2025 }, { "epoch": 0.20836330011301757, "grad_norm": 0.07705225795507431, "learning_rate": 0.008331963845521775, "loss": 2.5088, "step": 2028 }, { "epoch": 0.20867152984691256, "grad_norm": 0.18174925446510315, "learning_rate": 0.008344289235825801, "loss": 2.5264, "step": 2031 }, { "epoch": 0.20897975958080756, "grad_norm": 0.14415894448757172, "learning_rate": 0.008356614626129829, "loss": 2.5549, "step": 2034 }, { "epoch": 0.20928798931470255, "grad_norm": 0.17721933126449585, "learning_rate": 0.008368940016433854, "loss": 2.5476, "step": 2037 }, { "epoch": 0.20959621904859754, "grad_norm": 0.1727544367313385, "learning_rate": 0.00838126540673788, "loss": 2.5809, "step": 2040 }, { "epoch": 0.20990444878249256, "grad_norm": 0.20624054968357086, "learning_rate": 0.008393590797041908, "loss": 2.5256, "step": 2043 }, { "epoch": 0.21021267851638756, "grad_norm": 0.08070924133062363, "learning_rate": 0.008405916187345933, "loss": 2.5537, "step": 2046 }, { "epoch": 0.21052090825028255, "grad_norm": 0.07868220657110214, "learning_rate": 0.008418241577649959, "loss": 2.5266, "step": 2049 }, { "epoch": 0.21082913798417754, "grad_norm": 0.19941876828670502, "learning_rate": 0.008430566967953985, "loss": 2.5344, "step": 2052 }, { "epoch": 0.21113736771807254, "grad_norm": 0.08758697658777237, "learning_rate": 0.008442892358258012, "loss": 2.5409, "step": 2055 }, { "epoch": 0.21144559745196753, "grad_norm": 0.11635969579219818, "learning_rate": 0.008455217748562038, "loss": 2.5497, "step": 2058 }, { "epoch": 0.21175382718586253, "grad_norm": 0.16910326480865479, "learning_rate": 0.008467543138866064, "loss": 2.5509, "step": 2061 }, { "epoch": 0.21206205691975752, "grad_norm": 0.14605827629566193, "learning_rate": 0.008479868529170091, "loss": 2.5589, "step": 2064 }, { "epoch": 0.2123702866536525, "grad_norm": 0.18890123069286346, "learning_rate": 0.008492193919474117, "loss": 2.5454, "step": 2067 }, { "epoch": 0.2126785163875475, "grad_norm": 0.09277717024087906, "learning_rate": 0.008504519309778144, "loss": 2.4984, "step": 2070 }, { "epoch": 0.21298674612144253, "grad_norm": 0.07268327474594116, "learning_rate": 0.008516844700082168, "loss": 2.5323, "step": 2073 }, { "epoch": 0.21329497585533752, "grad_norm": 0.0807403028011322, "learning_rate": 0.008529170090386196, "loss": 2.5083, "step": 2076 }, { "epoch": 0.21360320558923251, "grad_norm": 0.12681947648525238, "learning_rate": 0.008541495480690222, "loss": 2.5386, "step": 2079 }, { "epoch": 0.2139114353231275, "grad_norm": 0.25378334522247314, "learning_rate": 0.008553820870994249, "loss": 2.5188, "step": 2082 }, { "epoch": 0.2142196650570225, "grad_norm": 0.15101733803749084, "learning_rate": 0.008566146261298275, "loss": 2.5457, "step": 2085 }, { "epoch": 0.2145278947909175, "grad_norm": 0.17336703836917877, "learning_rate": 0.0085784716516023, "loss": 2.5206, "step": 2088 }, { "epoch": 0.2148361245248125, "grad_norm": 0.07735245674848557, "learning_rate": 0.008590797041906328, "loss": 2.5297, "step": 2091 }, { "epoch": 0.21514435425870748, "grad_norm": 0.15841136872768402, "learning_rate": 0.008603122432210354, "loss": 2.5086, "step": 2094 }, { "epoch": 0.21545258399260248, "grad_norm": 0.15941859781742096, "learning_rate": 0.00861544782251438, "loss": 2.5316, "step": 2097 }, { "epoch": 0.21576081372649747, "grad_norm": 0.13837756216526031, "learning_rate": 0.008627773212818405, "loss": 2.4818, "step": 2100 }, { "epoch": 0.2160690434603925, "grad_norm": 0.14743675291538239, "learning_rate": 0.008640098603122433, "loss": 2.5351, "step": 2103 }, { "epoch": 0.21637727319428748, "grad_norm": 0.15961112082004547, "learning_rate": 0.008652423993426459, "loss": 2.4916, "step": 2106 }, { "epoch": 0.21668550292818248, "grad_norm": 0.16091223061084747, "learning_rate": 0.008664749383730484, "loss": 2.5026, "step": 2109 }, { "epoch": 0.21699373266207747, "grad_norm": 0.1695915311574936, "learning_rate": 0.008677074774034512, "loss": 2.4994, "step": 2112 }, { "epoch": 0.21730196239597246, "grad_norm": 0.1457175761461258, "learning_rate": 0.008689400164338537, "loss": 2.5225, "step": 2115 }, { "epoch": 0.21761019212986746, "grad_norm": 0.0995342880487442, "learning_rate": 0.008701725554642563, "loss": 2.5373, "step": 2118 }, { "epoch": 0.21791842186376245, "grad_norm": 0.11527393013238907, "learning_rate": 0.00871405094494659, "loss": 2.5207, "step": 2121 }, { "epoch": 0.21822665159765745, "grad_norm": 0.07951527088880539, "learning_rate": 0.008726376335250616, "loss": 2.4868, "step": 2124 }, { "epoch": 0.21853488133155244, "grad_norm": 0.11319970339536667, "learning_rate": 0.008738701725554644, "loss": 2.4965, "step": 2127 }, { "epoch": 0.21884311106544746, "grad_norm": 0.14932893216609955, "learning_rate": 0.008751027115858668, "loss": 2.5164, "step": 2130 }, { "epoch": 0.21915134079934245, "grad_norm": 0.1703396886587143, "learning_rate": 0.008763352506162695, "loss": 2.5175, "step": 2133 }, { "epoch": 0.21945957053323745, "grad_norm": 0.2208787351846695, "learning_rate": 0.008775677896466721, "loss": 2.521, "step": 2136 }, { "epoch": 0.21976780026713244, "grad_norm": 0.0884699895977974, "learning_rate": 0.008788003286770749, "loss": 2.5356, "step": 2139 }, { "epoch": 0.22007603000102743, "grad_norm": 0.06739311665296555, "learning_rate": 0.008800328677074774, "loss": 2.5102, "step": 2142 }, { "epoch": 0.22038425973492243, "grad_norm": 0.09653139859437943, "learning_rate": 0.0088126540673788, "loss": 2.5047, "step": 2145 }, { "epoch": 0.22069248946881742, "grad_norm": 0.11972832679748535, "learning_rate": 0.008824979457682828, "loss": 2.5086, "step": 2148 }, { "epoch": 0.22100071920271241, "grad_norm": 0.13725396990776062, "learning_rate": 0.008837304847986853, "loss": 2.5034, "step": 2151 }, { "epoch": 0.2213089489366074, "grad_norm": 0.09293966740369797, "learning_rate": 0.008849630238290879, "loss": 2.5004, "step": 2154 }, { "epoch": 0.2216171786705024, "grad_norm": 0.07625159621238708, "learning_rate": 0.008861955628594905, "loss": 2.508, "step": 2157 }, { "epoch": 0.22192540840439742, "grad_norm": 0.08581928163766861, "learning_rate": 0.008874281018898932, "loss": 2.4973, "step": 2160 }, { "epoch": 0.22223363813829242, "grad_norm": 0.12700457870960236, "learning_rate": 0.008886606409202958, "loss": 2.5174, "step": 2163 }, { "epoch": 0.2225418678721874, "grad_norm": 0.17155064642429352, "learning_rate": 0.008898931799506984, "loss": 2.4969, "step": 2166 }, { "epoch": 0.2228500976060824, "grad_norm": 0.13356278836727142, "learning_rate": 0.008911257189811011, "loss": 2.4876, "step": 2169 }, { "epoch": 0.2231583273399774, "grad_norm": 0.07805536687374115, "learning_rate": 0.008923582580115037, "loss": 2.5151, "step": 2172 }, { "epoch": 0.2234665570738724, "grad_norm": 0.10661714524030685, "learning_rate": 0.008935907970419064, "loss": 2.4607, "step": 2175 }, { "epoch": 0.22377478680776738, "grad_norm": 0.15095242857933044, "learning_rate": 0.008948233360723088, "loss": 2.5358, "step": 2178 }, { "epoch": 0.22408301654166238, "grad_norm": 0.11287077516317368, "learning_rate": 0.008960558751027116, "loss": 2.5289, "step": 2181 }, { "epoch": 0.22439124627555737, "grad_norm": 0.16408318281173706, "learning_rate": 0.008972884141331142, "loss": 2.5256, "step": 2184 }, { "epoch": 0.22469947600945236, "grad_norm": 0.1227622851729393, "learning_rate": 0.008985209531635169, "loss": 2.5091, "step": 2187 }, { "epoch": 0.22500770574334739, "grad_norm": 0.06549924612045288, "learning_rate": 0.008997534921939195, "loss": 2.4908, "step": 2190 }, { "epoch": 0.22531593547724238, "grad_norm": 0.09310626983642578, "learning_rate": 0.00900986031224322, "loss": 2.4903, "step": 2193 }, { "epoch": 0.22562416521113737, "grad_norm": 0.12637357413768768, "learning_rate": 0.009022185702547248, "loss": 2.5089, "step": 2196 }, { "epoch": 0.22593239494503237, "grad_norm": 0.1691301167011261, "learning_rate": 0.009034511092851274, "loss": 2.4984, "step": 2199 }, { "epoch": 0.22624062467892736, "grad_norm": 0.18173068761825562, "learning_rate": 0.0090468364831553, "loss": 2.4552, "step": 2202 }, { "epoch": 0.22654885441282235, "grad_norm": 0.19549600780010223, "learning_rate": 0.009059161873459327, "loss": 2.4642, "step": 2205 }, { "epoch": 0.22685708414671735, "grad_norm": 0.09038446098566055, "learning_rate": 0.009071487263763353, "loss": 2.5017, "step": 2208 }, { "epoch": 0.22716531388061234, "grad_norm": 0.07959726452827454, "learning_rate": 0.009083812654067379, "loss": 2.4934, "step": 2211 }, { "epoch": 0.22747354361450733, "grad_norm": 0.07991699874401093, "learning_rate": 0.009096138044371404, "loss": 2.498, "step": 2214 }, { "epoch": 0.22778177334840236, "grad_norm": 0.09022307395935059, "learning_rate": 0.009108463434675432, "loss": 2.4832, "step": 2217 }, { "epoch": 0.22809000308229735, "grad_norm": 0.11399543285369873, "learning_rate": 0.009120788824979457, "loss": 2.4929, "step": 2220 }, { "epoch": 0.22839823281619234, "grad_norm": 0.10349836200475693, "learning_rate": 0.009133114215283485, "loss": 2.4622, "step": 2223 }, { "epoch": 0.22870646255008734, "grad_norm": 0.17096632719039917, "learning_rate": 0.00914543960558751, "loss": 2.5103, "step": 2226 }, { "epoch": 0.22901469228398233, "grad_norm": 0.13803228735923767, "learning_rate": 0.009157764995891536, "loss": 2.5034, "step": 2229 }, { "epoch": 0.22932292201787732, "grad_norm": 0.16332487761974335, "learning_rate": 0.009170090386195564, "loss": 2.5051, "step": 2232 }, { "epoch": 0.22963115175177232, "grad_norm": 0.12147244811058044, "learning_rate": 0.00918241577649959, "loss": 2.4794, "step": 2235 }, { "epoch": 0.2299393814856673, "grad_norm": 0.08943907916545868, "learning_rate": 0.009194741166803615, "loss": 2.5331, "step": 2238 }, { "epoch": 0.2302476112195623, "grad_norm": 0.08069117367267609, "learning_rate": 0.009207066557107641, "loss": 2.4807, "step": 2241 }, { "epoch": 0.2305558409534573, "grad_norm": 0.11125557869672775, "learning_rate": 0.009219391947411669, "loss": 2.4567, "step": 2244 }, { "epoch": 0.23086407068735232, "grad_norm": 0.2825096547603607, "learning_rate": 0.009231717337715694, "loss": 2.5101, "step": 2247 }, { "epoch": 0.2311723004212473, "grad_norm": 0.10534384101629257, "learning_rate": 0.00924404272801972, "loss": 2.5272, "step": 2250 }, { "epoch": 0.2314805301551423, "grad_norm": 0.07159514725208282, "learning_rate": 0.009256368118323748, "loss": 2.4707, "step": 2253 }, { "epoch": 0.2317887598890373, "grad_norm": 0.06435802578926086, "learning_rate": 0.009268693508627773, "loss": 2.4788, "step": 2256 }, { "epoch": 0.2320969896229323, "grad_norm": 0.09402693063020706, "learning_rate": 0.0092810188989318, "loss": 2.4639, "step": 2259 }, { "epoch": 0.2324052193568273, "grad_norm": 0.18836408853530884, "learning_rate": 0.009293344289235825, "loss": 2.4747, "step": 2262 }, { "epoch": 0.23271344909072228, "grad_norm": 0.09705471992492676, "learning_rate": 0.009305669679539852, "loss": 2.5041, "step": 2265 }, { "epoch": 0.23302167882461727, "grad_norm": 0.09185091406106949, "learning_rate": 0.009317995069843878, "loss": 2.4625, "step": 2268 }, { "epoch": 0.23332990855851227, "grad_norm": 0.0848812386393547, "learning_rate": 0.009330320460147905, "loss": 2.4876, "step": 2271 }, { "epoch": 0.2336381382924073, "grad_norm": 0.07989475131034851, "learning_rate": 0.009342645850451931, "loss": 2.4697, "step": 2274 }, { "epoch": 0.23394636802630228, "grad_norm": 0.09660454094409943, "learning_rate": 0.009354971240755957, "loss": 2.4917, "step": 2277 }, { "epoch": 0.23425459776019728, "grad_norm": 0.09550273418426514, "learning_rate": 0.009367296631059984, "loss": 2.4806, "step": 2280 }, { "epoch": 0.23456282749409227, "grad_norm": 0.16650651395320892, "learning_rate": 0.00937962202136401, "loss": 2.4424, "step": 2283 }, { "epoch": 0.23487105722798726, "grad_norm": 0.1455817073583603, "learning_rate": 0.009391947411668036, "loss": 2.4907, "step": 2286 }, { "epoch": 0.23517928696188226, "grad_norm": 0.075865738093853, "learning_rate": 0.009404272801972062, "loss": 2.5004, "step": 2289 }, { "epoch": 0.23548751669577725, "grad_norm": 0.188491553068161, "learning_rate": 0.009416598192276089, "loss": 2.5111, "step": 2292 }, { "epoch": 0.23579574642967224, "grad_norm": 0.07567702233791351, "learning_rate": 0.009428923582580115, "loss": 2.4966, "step": 2295 }, { "epoch": 0.23610397616356724, "grad_norm": 0.0682358667254448, "learning_rate": 0.00944124897288414, "loss": 2.4781, "step": 2298 }, { "epoch": 0.23641220589746223, "grad_norm": 0.173895925283432, "learning_rate": 0.009453574363188168, "loss": 2.4471, "step": 2301 }, { "epoch": 0.23672043563135725, "grad_norm": 0.15088587999343872, "learning_rate": 0.009465899753492194, "loss": 2.4783, "step": 2304 }, { "epoch": 0.23702866536525224, "grad_norm": 0.09947361797094345, "learning_rate": 0.009478225143796221, "loss": 2.4757, "step": 2307 }, { "epoch": 0.23733689509914724, "grad_norm": 0.0709480568766594, "learning_rate": 0.009490550534100247, "loss": 2.4617, "step": 2310 }, { "epoch": 0.23764512483304223, "grad_norm": 0.11335324496030807, "learning_rate": 0.009502875924404273, "loss": 2.4506, "step": 2313 }, { "epoch": 0.23795335456693723, "grad_norm": 0.10329569876194, "learning_rate": 0.0095152013147083, "loss": 2.4444, "step": 2316 }, { "epoch": 0.23826158430083222, "grad_norm": 0.18935157358646393, "learning_rate": 0.009527526705012326, "loss": 2.4739, "step": 2319 }, { "epoch": 0.2385698140347272, "grad_norm": 0.10977230221033096, "learning_rate": 0.009539852095316352, "loss": 2.4849, "step": 2322 }, { "epoch": 0.2388780437686222, "grad_norm": 0.1623351126909256, "learning_rate": 0.009552177485620377, "loss": 2.4856, "step": 2325 }, { "epoch": 0.2391862735025172, "grad_norm": 0.12067209929227829, "learning_rate": 0.009564502875924405, "loss": 2.427, "step": 2328 }, { "epoch": 0.2394945032364122, "grad_norm": 0.12578649818897247, "learning_rate": 0.00957682826622843, "loss": 2.4719, "step": 2331 }, { "epoch": 0.23980273297030721, "grad_norm": 0.09442924708127975, "learning_rate": 0.009589153656532456, "loss": 2.475, "step": 2334 }, { "epoch": 0.2401109627042022, "grad_norm": 0.06693053990602493, "learning_rate": 0.009601479046836484, "loss": 2.4949, "step": 2337 }, { "epoch": 0.2404191924380972, "grad_norm": 0.09371168911457062, "learning_rate": 0.00961380443714051, "loss": 2.4611, "step": 2340 }, { "epoch": 0.2407274221719922, "grad_norm": 0.11009377986192703, "learning_rate": 0.009626129827444537, "loss": 2.4998, "step": 2343 }, { "epoch": 0.2410356519058872, "grad_norm": 0.08789053559303284, "learning_rate": 0.009638455217748561, "loss": 2.4891, "step": 2346 }, { "epoch": 0.24134388163978218, "grad_norm": 0.2513992488384247, "learning_rate": 0.009650780608052589, "loss": 2.4613, "step": 2349 }, { "epoch": 0.24165211137367718, "grad_norm": 0.09223336726427078, "learning_rate": 0.009663105998356614, "loss": 2.4874, "step": 2352 }, { "epoch": 0.24196034110757217, "grad_norm": 0.08941586315631866, "learning_rate": 0.009675431388660642, "loss": 2.4777, "step": 2355 }, { "epoch": 0.24226857084146716, "grad_norm": 0.09664765000343323, "learning_rate": 0.009687756778964668, "loss": 2.4728, "step": 2358 }, { "epoch": 0.24257680057536218, "grad_norm": 0.08079587668180466, "learning_rate": 0.009700082169268693, "loss": 2.4621, "step": 2361 }, { "epoch": 0.24288503030925718, "grad_norm": 0.07663597911596298, "learning_rate": 0.00971240755957272, "loss": 2.487, "step": 2364 }, { "epoch": 0.24319326004315217, "grad_norm": 0.07564109563827515, "learning_rate": 0.009724732949876747, "loss": 2.4123, "step": 2367 }, { "epoch": 0.24350148977704716, "grad_norm": 0.1025756299495697, "learning_rate": 0.009737058340180772, "loss": 2.4669, "step": 2370 }, { "epoch": 0.24380971951094216, "grad_norm": 0.1370251476764679, "learning_rate": 0.009749383730484798, "loss": 2.4664, "step": 2373 }, { "epoch": 0.24411794924483715, "grad_norm": 0.11926325410604477, "learning_rate": 0.009761709120788825, "loss": 2.4483, "step": 2376 }, { "epoch": 0.24442617897873214, "grad_norm": 0.16847510635852814, "learning_rate": 0.009774034511092851, "loss": 2.4421, "step": 2379 }, { "epoch": 0.24473440871262714, "grad_norm": 0.14343461394309998, "learning_rate": 0.009786359901396877, "loss": 2.452, "step": 2382 }, { "epoch": 0.24504263844652213, "grad_norm": 0.0658588707447052, "learning_rate": 0.009798685291700904, "loss": 2.4717, "step": 2385 }, { "epoch": 0.24535086818041713, "grad_norm": 0.09394209086894989, "learning_rate": 0.00981101068200493, "loss": 2.4467, "step": 2388 }, { "epoch": 0.24565909791431215, "grad_norm": 0.0717134177684784, "learning_rate": 0.009823336072308958, "loss": 2.4505, "step": 2391 }, { "epoch": 0.24596732764820714, "grad_norm": 0.07518400996923447, "learning_rate": 0.009835661462612983, "loss": 2.431, "step": 2394 }, { "epoch": 0.24627555738210213, "grad_norm": 0.10242413729429245, "learning_rate": 0.00984798685291701, "loss": 2.451, "step": 2397 }, { "epoch": 0.24658378711599713, "grad_norm": 0.11668457090854645, "learning_rate": 0.009860312243221035, "loss": 2.4574, "step": 2400 }, { "epoch": 0.24689201684989212, "grad_norm": 0.1074887290596962, "learning_rate": 0.009872637633525062, "loss": 2.4688, "step": 2403 }, { "epoch": 0.24720024658378711, "grad_norm": 0.143118217587471, "learning_rate": 0.009884963023829088, "loss": 2.4614, "step": 2406 }, { "epoch": 0.2475084763176821, "grad_norm": 0.08865509182214737, "learning_rate": 0.009897288414133114, "loss": 2.4768, "step": 2409 }, { "epoch": 0.2478167060515771, "grad_norm": 0.10735021531581879, "learning_rate": 0.009909613804437141, "loss": 2.457, "step": 2412 }, { "epoch": 0.2481249357854721, "grad_norm": 0.11766096949577332, "learning_rate": 0.009921939194741167, "loss": 2.4661, "step": 2415 }, { "epoch": 0.2484331655193671, "grad_norm": 0.11476657539606094, "learning_rate": 0.009934264585045193, "loss": 2.4488, "step": 2418 }, { "epoch": 0.2487413952532621, "grad_norm": 0.05828983336687088, "learning_rate": 0.00994658997534922, "loss": 2.4167, "step": 2421 }, { "epoch": 0.2490496249871571, "grad_norm": 0.05311143398284912, "learning_rate": 0.009958915365653246, "loss": 2.451, "step": 2424 }, { "epoch": 0.2493578547210521, "grad_norm": 0.14447921514511108, "learning_rate": 0.009971240755957273, "loss": 2.4448, "step": 2427 }, { "epoch": 0.2496660844549471, "grad_norm": 0.178679421544075, "learning_rate": 0.009983566146261297, "loss": 2.4577, "step": 2430 }, { "epoch": 0.24997431418884208, "grad_norm": 0.18707922101020813, "learning_rate": 0.009995891536565325, "loss": 2.4544, "step": 2433 }, { "epoch": 0.2502825439227371, "grad_norm": 0.11012792587280273, "learning_rate": 0.01, "loss": 2.4636, "step": 2436 }, { "epoch": 0.25059077365663207, "grad_norm": 0.1133418157696724, "learning_rate": 0.01, "loss": 2.4694, "step": 2439 }, { "epoch": 0.25089900339052706, "grad_norm": 0.09263787418603897, "learning_rate": 0.01, "loss": 2.4174, "step": 2442 }, { "epoch": 0.25120723312442206, "grad_norm": 0.07637537270784378, "learning_rate": 0.01, "loss": 2.4546, "step": 2445 }, { "epoch": 0.25151546285831705, "grad_norm": 0.05083318054676056, "learning_rate": 0.01, "loss": 2.4517, "step": 2448 }, { "epoch": 0.25182369259221205, "grad_norm": 0.11429949849843979, "learning_rate": 0.01, "loss": 2.3998, "step": 2451 }, { "epoch": 0.25213192232610704, "grad_norm": 0.0740060955286026, "learning_rate": 0.01, "loss": 2.4572, "step": 2454 }, { "epoch": 0.25244015206000203, "grad_norm": 0.23151956498622894, "learning_rate": 0.01, "loss": 2.4507, "step": 2457 }, { "epoch": 0.252748381793897, "grad_norm": 0.09557089954614639, "learning_rate": 0.01, "loss": 2.438, "step": 2460 }, { "epoch": 0.2530566115277921, "grad_norm": 0.06453042477369308, "learning_rate": 0.01, "loss": 2.4444, "step": 2463 }, { "epoch": 0.25336484126168707, "grad_norm": 0.06805883347988129, "learning_rate": 0.01, "loss": 2.4333, "step": 2466 }, { "epoch": 0.25367307099558206, "grad_norm": 0.12063002586364746, "learning_rate": 0.01, "loss": 2.4349, "step": 2469 }, { "epoch": 0.25398130072947706, "grad_norm": 0.12683679163455963, "learning_rate": 0.01, "loss": 2.4615, "step": 2472 }, { "epoch": 0.25428953046337205, "grad_norm": 0.19388514757156372, "learning_rate": 0.01, "loss": 2.4251, "step": 2475 }, { "epoch": 0.25459776019726704, "grad_norm": 0.15118692815303802, "learning_rate": 0.01, "loss": 2.4493, "step": 2478 }, { "epoch": 0.25490598993116204, "grad_norm": 0.0716528594493866, "learning_rate": 0.01, "loss": 2.4177, "step": 2481 }, { "epoch": 0.25521421966505703, "grad_norm": 0.06410454958677292, "learning_rate": 0.01, "loss": 2.4472, "step": 2484 }, { "epoch": 0.255522449398952, "grad_norm": 0.0613977424800396, "learning_rate": 0.01, "loss": 2.4374, "step": 2487 }, { "epoch": 0.255830679132847, "grad_norm": 0.18522503972053528, "learning_rate": 0.01, "loss": 2.4237, "step": 2490 }, { "epoch": 0.256138908866742, "grad_norm": 0.10789433866739273, "learning_rate": 0.01, "loss": 2.4035, "step": 2493 }, { "epoch": 0.256447138600637, "grad_norm": 0.17734338343143463, "learning_rate": 0.01, "loss": 2.4809, "step": 2496 }, { "epoch": 0.256755368334532, "grad_norm": 0.09952409565448761, "learning_rate": 0.01, "loss": 2.4586, "step": 2499 }, { "epoch": 0.257063598068427, "grad_norm": 0.15578734874725342, "learning_rate": 0.01, "loss": 2.4451, "step": 2502 }, { "epoch": 0.257371827802322, "grad_norm": 0.11684698611497879, "learning_rate": 0.01, "loss": 2.4422, "step": 2505 }, { "epoch": 0.257680057536217, "grad_norm": 0.06539366394281387, "learning_rate": 0.01, "loss": 2.4369, "step": 2508 }, { "epoch": 0.25798828727011197, "grad_norm": 0.15363268554210663, "learning_rate": 0.01, "loss": 2.4307, "step": 2511 }, { "epoch": 0.25829651700400696, "grad_norm": 0.07657501846551895, "learning_rate": 0.01, "loss": 2.4287, "step": 2514 }, { "epoch": 0.25860474673790196, "grad_norm": 0.11238528788089752, "learning_rate": 0.01, "loss": 2.415, "step": 2517 }, { "epoch": 0.25891297647179695, "grad_norm": 0.08362044394016266, "learning_rate": 0.01, "loss": 2.4603, "step": 2520 }, { "epoch": 0.259221206205692, "grad_norm": 0.07373514771461487, "learning_rate": 0.01, "loss": 2.3937, "step": 2523 }, { "epoch": 0.259529435939587, "grad_norm": 0.062842458486557, "learning_rate": 0.01, "loss": 2.4096, "step": 2526 }, { "epoch": 0.259837665673482, "grad_norm": 0.12551096081733704, "learning_rate": 0.01, "loss": 2.4379, "step": 2529 }, { "epoch": 0.260145895407377, "grad_norm": 0.06409156322479248, "learning_rate": 0.01, "loss": 2.4212, "step": 2532 }, { "epoch": 0.260454125141272, "grad_norm": 0.10057753324508667, "learning_rate": 0.01, "loss": 2.4349, "step": 2535 }, { "epoch": 0.26076235487516697, "grad_norm": 0.1575561910867691, "learning_rate": 0.01, "loss": 2.44, "step": 2538 }, { "epoch": 0.26107058460906196, "grad_norm": 0.25684165954589844, "learning_rate": 0.01, "loss": 2.4308, "step": 2541 }, { "epoch": 0.26137881434295696, "grad_norm": 0.07472192496061325, "learning_rate": 0.01, "loss": 2.4065, "step": 2544 }, { "epoch": 0.26168704407685195, "grad_norm": 0.060896482318639755, "learning_rate": 0.01, "loss": 2.4347, "step": 2547 }, { "epoch": 0.26199527381074694, "grad_norm": 0.12883131206035614, "learning_rate": 0.01, "loss": 2.42, "step": 2550 }, { "epoch": 0.26230350354464194, "grad_norm": 0.10772990435361862, "learning_rate": 0.01, "loss": 2.3982, "step": 2553 }, { "epoch": 0.26261173327853693, "grad_norm": 0.20955395698547363, "learning_rate": 0.01, "loss": 2.4204, "step": 2556 }, { "epoch": 0.2629199630124319, "grad_norm": 0.08120223879814148, "learning_rate": 0.01, "loss": 2.4192, "step": 2559 }, { "epoch": 0.2632281927463269, "grad_norm": 0.059099119156599045, "learning_rate": 0.01, "loss": 2.4252, "step": 2562 }, { "epoch": 0.2635364224802219, "grad_norm": 0.08729352802038193, "learning_rate": 0.01, "loss": 2.4227, "step": 2565 }, { "epoch": 0.2638446522141169, "grad_norm": 0.1920178234577179, "learning_rate": 0.01, "loss": 2.4017, "step": 2568 }, { "epoch": 0.2641528819480119, "grad_norm": 0.15997105836868286, "learning_rate": 0.01, "loss": 2.444, "step": 2571 }, { "epoch": 0.2644611116819069, "grad_norm": 0.12249890714883804, "learning_rate": 0.01, "loss": 2.3957, "step": 2574 }, { "epoch": 0.2647693414158019, "grad_norm": 0.05974414199590683, "learning_rate": 0.01, "loss": 2.421, "step": 2577 }, { "epoch": 0.26507757114969693, "grad_norm": 0.13711535930633545, "learning_rate": 0.01, "loss": 2.4234, "step": 2580 }, { "epoch": 0.2653858008835919, "grad_norm": 0.15437988936901093, "learning_rate": 0.01, "loss": 2.4216, "step": 2583 }, { "epoch": 0.2656940306174869, "grad_norm": 0.10766157507896423, "learning_rate": 0.01, "loss": 2.4086, "step": 2586 }, { "epoch": 0.2660022603513819, "grad_norm": 0.0736764669418335, "learning_rate": 0.01, "loss": 2.4227, "step": 2589 }, { "epoch": 0.2663104900852769, "grad_norm": 0.06279190629720688, "learning_rate": 0.01, "loss": 2.4371, "step": 2592 }, { "epoch": 0.2666187198191719, "grad_norm": 0.11150863766670227, "learning_rate": 0.01, "loss": 2.3941, "step": 2595 }, { "epoch": 0.2669269495530669, "grad_norm": 0.1527506411075592, "learning_rate": 0.01, "loss": 2.4287, "step": 2598 }, { "epoch": 0.2672351792869619, "grad_norm": 0.13321219384670258, "learning_rate": 0.01, "loss": 2.3995, "step": 2601 }, { "epoch": 0.2675434090208569, "grad_norm": 0.1157502606511116, "learning_rate": 0.01, "loss": 2.4284, "step": 2604 }, { "epoch": 0.2678516387547519, "grad_norm": 0.10027257353067398, "learning_rate": 0.01, "loss": 2.3877, "step": 2607 }, { "epoch": 0.26815986848864687, "grad_norm": 0.10909545421600342, "learning_rate": 0.01, "loss": 2.4134, "step": 2610 }, { "epoch": 0.26846809822254186, "grad_norm": 0.09810952842235565, "learning_rate": 0.01, "loss": 2.4231, "step": 2613 }, { "epoch": 0.26877632795643686, "grad_norm": 0.06906435638666153, "learning_rate": 0.01, "loss": 2.3989, "step": 2616 }, { "epoch": 0.26908455769033185, "grad_norm": 0.10627961158752441, "learning_rate": 0.01, "loss": 2.4333, "step": 2619 }, { "epoch": 0.26939278742422684, "grad_norm": 0.10462147742509842, "learning_rate": 0.01, "loss": 2.394, "step": 2622 }, { "epoch": 0.26970101715812184, "grad_norm": 0.10885953158140182, "learning_rate": 0.01, "loss": 2.4172, "step": 2625 }, { "epoch": 0.27000924689201683, "grad_norm": 0.0981958881020546, "learning_rate": 0.01, "loss": 2.4112, "step": 2628 }, { "epoch": 0.2703174766259118, "grad_norm": 0.14177650213241577, "learning_rate": 0.01, "loss": 2.3748, "step": 2631 }, { "epoch": 0.2706257063598068, "grad_norm": 0.06374615430831909, "learning_rate": 0.01, "loss": 2.4054, "step": 2634 }, { "epoch": 0.27093393609370187, "grad_norm": 0.23363849520683289, "learning_rate": 0.01, "loss": 2.4194, "step": 2637 }, { "epoch": 0.27124216582759686, "grad_norm": 0.10294153541326523, "learning_rate": 0.01, "loss": 2.384, "step": 2640 }, { "epoch": 0.27155039556149185, "grad_norm": 0.17200984060764313, "learning_rate": 0.01, "loss": 2.4122, "step": 2643 }, { "epoch": 0.27185862529538685, "grad_norm": 0.06513970345258713, "learning_rate": 0.01, "loss": 2.4152, "step": 2646 }, { "epoch": 0.27216685502928184, "grad_norm": 0.08533628284931183, "learning_rate": 0.01, "loss": 2.4508, "step": 2649 }, { "epoch": 0.27247508476317683, "grad_norm": 0.07299966365098953, "learning_rate": 0.01, "loss": 2.4091, "step": 2652 }, { "epoch": 0.2727833144970718, "grad_norm": 0.06617329269647598, "learning_rate": 0.01, "loss": 2.4, "step": 2655 }, { "epoch": 0.2730915442309668, "grad_norm": 0.07062381505966187, "learning_rate": 0.01, "loss": 2.4002, "step": 2658 }, { "epoch": 0.2733997739648618, "grad_norm": 0.11162712424993515, "learning_rate": 0.01, "loss": 2.386, "step": 2661 }, { "epoch": 0.2737080036987568, "grad_norm": 0.07827174663543701, "learning_rate": 0.01, "loss": 2.4111, "step": 2664 }, { "epoch": 0.2740162334326518, "grad_norm": 0.07248109579086304, "learning_rate": 0.01, "loss": 2.3968, "step": 2667 }, { "epoch": 0.2743244631665468, "grad_norm": 0.1251075118780136, "learning_rate": 0.01, "loss": 2.4216, "step": 2670 }, { "epoch": 0.2746326929004418, "grad_norm": 0.1280512660741806, "learning_rate": 0.01, "loss": 2.4233, "step": 2673 }, { "epoch": 0.2749409226343368, "grad_norm": 0.06290891766548157, "learning_rate": 0.01, "loss": 2.412, "step": 2676 }, { "epoch": 0.2752491523682318, "grad_norm": 0.09324091672897339, "learning_rate": 0.01, "loss": 2.4025, "step": 2679 }, { "epoch": 0.27555738210212677, "grad_norm": 0.06253890693187714, "learning_rate": 0.01, "loss": 2.4197, "step": 2682 }, { "epoch": 0.27586561183602176, "grad_norm": 0.10279545187950134, "learning_rate": 0.01, "loss": 2.4099, "step": 2685 }, { "epoch": 0.27617384156991676, "grad_norm": 0.07942310720682144, "learning_rate": 0.01, "loss": 2.4052, "step": 2688 }, { "epoch": 0.27648207130381175, "grad_norm": 0.10373161733150482, "learning_rate": 0.01, "loss": 2.3899, "step": 2691 }, { "epoch": 0.2767903010377068, "grad_norm": 0.312575101852417, "learning_rate": 0.01, "loss": 2.4243, "step": 2694 }, { "epoch": 0.2770985307716018, "grad_norm": 0.07417728751897812, "learning_rate": 0.01, "loss": 2.3604, "step": 2697 }, { "epoch": 0.2774067605054968, "grad_norm": 0.09007294476032257, "learning_rate": 0.01, "loss": 2.3863, "step": 2700 }, { "epoch": 0.2777149902393918, "grad_norm": 0.10452757775783539, "learning_rate": 0.01, "loss": 2.41, "step": 2703 }, { "epoch": 0.2780232199732868, "grad_norm": 0.09276364743709564, "learning_rate": 0.01, "loss": 2.3878, "step": 2706 }, { "epoch": 0.27833144970718177, "grad_norm": 0.08949960023164749, "learning_rate": 0.01, "loss": 2.3823, "step": 2709 }, { "epoch": 0.27863967944107676, "grad_norm": 0.0589129813015461, "learning_rate": 0.01, "loss": 2.401, "step": 2712 }, { "epoch": 0.27894790917497175, "grad_norm": 0.08298425376415253, "learning_rate": 0.01, "loss": 2.4, "step": 2715 }, { "epoch": 0.27925613890886675, "grad_norm": 0.07719019800424576, "learning_rate": 0.01, "loss": 2.3726, "step": 2718 }, { "epoch": 0.27956436864276174, "grad_norm": 0.09369128197431564, "learning_rate": 0.01, "loss": 2.3893, "step": 2721 }, { "epoch": 0.27987259837665673, "grad_norm": 0.11461931467056274, "learning_rate": 0.01, "loss": 2.4017, "step": 2724 }, { "epoch": 0.2801808281105517, "grad_norm": 0.050078991800546646, "learning_rate": 0.01, "loss": 2.3852, "step": 2727 }, { "epoch": 0.2804890578444467, "grad_norm": 0.08188966661691666, "learning_rate": 0.01, "loss": 2.3469, "step": 2730 }, { "epoch": 0.2807972875783417, "grad_norm": 0.0805756077170372, "learning_rate": 0.01, "loss": 2.3632, "step": 2733 }, { "epoch": 0.2811055173122367, "grad_norm": 0.07377249747514725, "learning_rate": 0.01, "loss": 2.3852, "step": 2736 }, { "epoch": 0.2814137470461317, "grad_norm": 0.17040085792541504, "learning_rate": 0.01, "loss": 2.3904, "step": 2739 }, { "epoch": 0.2817219767800267, "grad_norm": 0.1419583261013031, "learning_rate": 0.01, "loss": 2.3735, "step": 2742 }, { "epoch": 0.2820302065139217, "grad_norm": 0.13182134926319122, "learning_rate": 0.01, "loss": 2.3904, "step": 2745 }, { "epoch": 0.2823384362478167, "grad_norm": 0.1058223620057106, "learning_rate": 0.01, "loss": 2.3922, "step": 2748 }, { "epoch": 0.2826466659817117, "grad_norm": 0.08037062734365463, "learning_rate": 0.01, "loss": 2.3692, "step": 2751 }, { "epoch": 0.2829548957156067, "grad_norm": 0.10247037559747696, "learning_rate": 0.01, "loss": 2.3712, "step": 2754 }, { "epoch": 0.2832631254495017, "grad_norm": 0.09925279021263123, "learning_rate": 0.01, "loss": 2.3632, "step": 2757 }, { "epoch": 0.2835713551833967, "grad_norm": 0.05111562833189964, "learning_rate": 0.01, "loss": 2.3622, "step": 2760 }, { "epoch": 0.2838795849172917, "grad_norm": 0.060480840504169464, "learning_rate": 0.01, "loss": 2.3592, "step": 2763 }, { "epoch": 0.2841878146511867, "grad_norm": 0.13488496840000153, "learning_rate": 0.01, "loss": 2.3822, "step": 2766 }, { "epoch": 0.2844960443850817, "grad_norm": 0.08369171619415283, "learning_rate": 0.01, "loss": 2.3922, "step": 2769 }, { "epoch": 0.2848042741189767, "grad_norm": 0.19474861025810242, "learning_rate": 0.01, "loss": 2.387, "step": 2772 }, { "epoch": 0.2851125038528717, "grad_norm": 0.17801512777805328, "learning_rate": 0.01, "loss": 2.3745, "step": 2775 }, { "epoch": 0.2854207335867667, "grad_norm": 0.0658038854598999, "learning_rate": 0.01, "loss": 2.3857, "step": 2778 }, { "epoch": 0.28572896332066167, "grad_norm": 0.0510118305683136, "learning_rate": 0.01, "loss": 2.3735, "step": 2781 }, { "epoch": 0.28603719305455666, "grad_norm": 0.0649714320898056, "learning_rate": 0.01, "loss": 2.4002, "step": 2784 }, { "epoch": 0.28634542278845165, "grad_norm": 0.11462211608886719, "learning_rate": 0.01, "loss": 2.3642, "step": 2787 }, { "epoch": 0.28665365252234665, "grad_norm": 0.0745900496840477, "learning_rate": 0.01, "loss": 2.4058, "step": 2790 }, { "epoch": 0.28696188225624164, "grad_norm": 0.2475040704011917, "learning_rate": 0.01, "loss": 2.3778, "step": 2793 }, { "epoch": 0.28727011199013663, "grad_norm": 0.08792129158973694, "learning_rate": 0.01, "loss": 2.3932, "step": 2796 }, { "epoch": 0.2875783417240316, "grad_norm": 0.04952983185648918, "learning_rate": 0.01, "loss": 2.3631, "step": 2799 }, { "epoch": 0.2878865714579266, "grad_norm": 0.053665559738874435, "learning_rate": 0.01, "loss": 2.3673, "step": 2802 }, { "epoch": 0.2881948011918216, "grad_norm": 0.0579262301325798, "learning_rate": 0.01, "loss": 2.3234, "step": 2805 }, { "epoch": 0.2885030309257166, "grad_norm": 0.13837358355522156, "learning_rate": 0.01, "loss": 2.3854, "step": 2808 }, { "epoch": 0.28881126065961166, "grad_norm": 0.09924750030040741, "learning_rate": 0.01, "loss": 2.3819, "step": 2811 }, { "epoch": 0.28911949039350665, "grad_norm": 0.14742402732372284, "learning_rate": 0.01, "loss": 2.3853, "step": 2814 }, { "epoch": 0.28942772012740164, "grad_norm": 0.11731177568435669, "learning_rate": 0.01, "loss": 2.4082, "step": 2817 }, { "epoch": 0.28973594986129664, "grad_norm": 0.16812686622142792, "learning_rate": 0.01, "loss": 2.3855, "step": 2820 }, { "epoch": 0.29004417959519163, "grad_norm": 0.06864415854215622, "learning_rate": 0.01, "loss": 2.3911, "step": 2823 }, { "epoch": 0.2903524093290866, "grad_norm": 0.050597239285707474, "learning_rate": 0.01, "loss": 2.3627, "step": 2826 }, { "epoch": 0.2906606390629816, "grad_norm": 0.06927742809057236, "learning_rate": 0.01, "loss": 2.3653, "step": 2829 }, { "epoch": 0.2909688687968766, "grad_norm": 0.049216922372579575, "learning_rate": 0.01, "loss": 2.3527, "step": 2832 }, { "epoch": 0.2912770985307716, "grad_norm": 0.06790090352296829, "learning_rate": 0.01, "loss": 2.4087, "step": 2835 }, { "epoch": 0.2915853282646666, "grad_norm": 0.14112398028373718, "learning_rate": 0.01, "loss": 2.3777, "step": 2838 }, { "epoch": 0.2918935579985616, "grad_norm": 0.07459170371294022, "learning_rate": 0.01, "loss": 2.3776, "step": 2841 }, { "epoch": 0.2922017877324566, "grad_norm": 0.05480146035552025, "learning_rate": 0.01, "loss": 2.3831, "step": 2844 }, { "epoch": 0.2925100174663516, "grad_norm": 0.11372058093547821, "learning_rate": 0.01, "loss": 2.3667, "step": 2847 }, { "epoch": 0.2928182472002466, "grad_norm": 0.05589181184768677, "learning_rate": 0.01, "loss": 2.3744, "step": 2850 }, { "epoch": 0.29312647693414157, "grad_norm": 0.10505107790231705, "learning_rate": 0.01, "loss": 2.3461, "step": 2853 }, { "epoch": 0.29343470666803656, "grad_norm": 0.06241190806031227, "learning_rate": 0.01, "loss": 2.3616, "step": 2856 }, { "epoch": 0.29374293640193155, "grad_norm": 0.2687353193759918, "learning_rate": 0.01, "loss": 2.3728, "step": 2859 }, { "epoch": 0.29405116613582655, "grad_norm": 0.13569511473178864, "learning_rate": 0.01, "loss": 2.3758, "step": 2862 }, { "epoch": 0.29435939586972154, "grad_norm": 0.08852502703666687, "learning_rate": 0.01, "loss": 2.3805, "step": 2865 }, { "epoch": 0.2946676256036166, "grad_norm": 0.0690246969461441, "learning_rate": 0.01, "loss": 2.3694, "step": 2868 }, { "epoch": 0.2949758553375116, "grad_norm": 0.13508114218711853, "learning_rate": 0.01, "loss": 2.3626, "step": 2871 }, { "epoch": 0.2952840850714066, "grad_norm": 0.06574945896863937, "learning_rate": 0.01, "loss": 2.3661, "step": 2874 }, { "epoch": 0.29559231480530157, "grad_norm": 0.08492054790258408, "learning_rate": 0.01, "loss": 2.3737, "step": 2877 }, { "epoch": 0.29590054453919656, "grad_norm": 0.11930177360773087, "learning_rate": 0.01, "loss": 2.3684, "step": 2880 }, { "epoch": 0.29620877427309156, "grad_norm": 0.06913982331752777, "learning_rate": 0.01, "loss": 2.3597, "step": 2883 }, { "epoch": 0.29651700400698655, "grad_norm": 0.1508978009223938, "learning_rate": 0.01, "loss": 2.3807, "step": 2886 }, { "epoch": 0.29682523374088154, "grad_norm": 0.059416841715574265, "learning_rate": 0.01, "loss": 2.3672, "step": 2889 }, { "epoch": 0.29713346347477654, "grad_norm": 0.07864934206008911, "learning_rate": 0.01, "loss": 2.3542, "step": 2892 }, { "epoch": 0.29744169320867153, "grad_norm": 0.15172207355499268, "learning_rate": 0.01, "loss": 2.3816, "step": 2895 }, { "epoch": 0.2977499229425665, "grad_norm": 0.08946362882852554, "learning_rate": 0.01, "loss": 2.3854, "step": 2898 }, { "epoch": 0.2980581526764615, "grad_norm": 0.06231836602091789, "learning_rate": 0.01, "loss": 2.3803, "step": 2901 }, { "epoch": 0.2983663824103565, "grad_norm": 0.06673764437437057, "learning_rate": 0.01, "loss": 2.3506, "step": 2904 }, { "epoch": 0.2986746121442515, "grad_norm": 0.11514609307050705, "learning_rate": 0.01, "loss": 2.3345, "step": 2907 }, { "epoch": 0.2989828418781465, "grad_norm": 0.05702753737568855, "learning_rate": 0.01, "loss": 2.353, "step": 2910 }, { "epoch": 0.2992910716120415, "grad_norm": 0.09202984720468521, "learning_rate": 0.01, "loss": 2.3978, "step": 2913 }, { "epoch": 0.2995993013459365, "grad_norm": 0.09088042378425598, "learning_rate": 0.01, "loss": 2.3508, "step": 2916 }, { "epoch": 0.2999075310798315, "grad_norm": 0.09106214344501495, "learning_rate": 0.01, "loss": 2.3695, "step": 2919 }, { "epoch": 0.3002157608137265, "grad_norm": 0.12793834507465363, "learning_rate": 0.01, "loss": 2.3585, "step": 2922 }, { "epoch": 0.30052399054762147, "grad_norm": 0.16437458992004395, "learning_rate": 0.01, "loss": 2.3708, "step": 2925 }, { "epoch": 0.3008322202815165, "grad_norm": 0.10168170928955078, "learning_rate": 0.01, "loss": 2.3839, "step": 2928 }, { "epoch": 0.3011404500154115, "grad_norm": 0.12716282904148102, "learning_rate": 0.01, "loss": 2.3653, "step": 2931 }, { "epoch": 0.3014486797493065, "grad_norm": 0.05094976723194122, "learning_rate": 0.01, "loss": 2.3315, "step": 2934 }, { "epoch": 0.3017569094832015, "grad_norm": 0.11750750988721848, "learning_rate": 0.01, "loss": 2.3544, "step": 2937 }, { "epoch": 0.3020651392170965, "grad_norm": 0.0688977912068367, "learning_rate": 0.01, "loss": 2.3485, "step": 2940 }, { "epoch": 0.3023733689509915, "grad_norm": 0.09537909924983978, "learning_rate": 0.01, "loss": 2.3357, "step": 2943 }, { "epoch": 0.3026815986848865, "grad_norm": 0.15028056502342224, "learning_rate": 0.01, "loss": 2.3029, "step": 2946 }, { "epoch": 0.30298982841878147, "grad_norm": 0.2069140523672104, "learning_rate": 0.01, "loss": 2.3658, "step": 2949 }, { "epoch": 0.30329805815267646, "grad_norm": 0.04774792492389679, "learning_rate": 0.01, "loss": 2.3488, "step": 2952 }, { "epoch": 0.30360628788657146, "grad_norm": 0.04033259302377701, "learning_rate": 0.01, "loss": 2.3536, "step": 2955 }, { "epoch": 0.30391451762046645, "grad_norm": 0.04587483033537865, "learning_rate": 0.01, "loss": 2.3377, "step": 2958 }, { "epoch": 0.30422274735436144, "grad_norm": 0.08392881602048874, "learning_rate": 0.01, "loss": 2.3323, "step": 2961 }, { "epoch": 0.30453097708825644, "grad_norm": 0.16665025055408478, "learning_rate": 0.01, "loss": 2.3763, "step": 2964 }, { "epoch": 0.30483920682215143, "grad_norm": 0.19268077611923218, "learning_rate": 0.01, "loss": 2.3545, "step": 2967 }, { "epoch": 0.3051474365560464, "grad_norm": 0.14428319036960602, "learning_rate": 0.01, "loss": 2.3481, "step": 2970 }, { "epoch": 0.3054556662899414, "grad_norm": 0.08958342671394348, "learning_rate": 0.01, "loss": 2.3704, "step": 2973 }, { "epoch": 0.3057638960238364, "grad_norm": 0.06964152306318283, "learning_rate": 0.01, "loss": 2.3649, "step": 2976 }, { "epoch": 0.3060721257577314, "grad_norm": 0.1336866170167923, "learning_rate": 0.01, "loss": 2.3426, "step": 2979 }, { "epoch": 0.3063803554916264, "grad_norm": 0.06913724541664124, "learning_rate": 0.01, "loss": 2.363, "step": 2982 }, { "epoch": 0.30668858522552145, "grad_norm": 0.0705854743719101, "learning_rate": 0.01, "loss": 2.327, "step": 2985 }, { "epoch": 0.30699681495941644, "grad_norm": 0.06596222519874573, "learning_rate": 0.01, "loss": 2.3669, "step": 2988 }, { "epoch": 0.30730504469331144, "grad_norm": 0.12716993689537048, "learning_rate": 0.01, "loss": 2.3395, "step": 2991 }, { "epoch": 0.30761327442720643, "grad_norm": 0.09933049976825714, "learning_rate": 0.01, "loss": 2.3532, "step": 2994 }, { "epoch": 0.3079215041611014, "grad_norm": 0.19280697405338287, "learning_rate": 0.01, "loss": 2.3513, "step": 2997 }, { "epoch": 0.3082297338949964, "grad_norm": 0.08448618650436401, "learning_rate": 0.01, "loss": 2.3828, "step": 3000 }, { "epoch": 0.3085379636288914, "grad_norm": 0.14882349967956543, "learning_rate": 0.01, "loss": 2.3398, "step": 3003 }, { "epoch": 0.3088461933627864, "grad_norm": 0.08360068500041962, "learning_rate": 0.01, "loss": 2.3414, "step": 3006 }, { "epoch": 0.3091544230966814, "grad_norm": 0.1378074288368225, "learning_rate": 0.01, "loss": 2.3461, "step": 3009 }, { "epoch": 0.3094626528305764, "grad_norm": 0.13160692155361176, "learning_rate": 0.01, "loss": 2.3517, "step": 3012 }, { "epoch": 0.3097708825644714, "grad_norm": 0.0702040046453476, "learning_rate": 0.01, "loss": 2.3524, "step": 3015 }, { "epoch": 0.3100791122983664, "grad_norm": 0.06959223002195358, "learning_rate": 0.01, "loss": 2.3398, "step": 3018 }, { "epoch": 0.31038734203226137, "grad_norm": 0.10830830782651901, "learning_rate": 0.01, "loss": 2.3437, "step": 3021 }, { "epoch": 0.31069557176615636, "grad_norm": 0.09298605471849442, "learning_rate": 0.01, "loss": 2.3473, "step": 3024 }, { "epoch": 0.31100380150005136, "grad_norm": 0.06620427966117859, "learning_rate": 0.01, "loss": 2.3341, "step": 3027 }, { "epoch": 0.31131203123394635, "grad_norm": 0.21722812950611115, "learning_rate": 0.01, "loss": 2.3281, "step": 3030 }, { "epoch": 0.31162026096784134, "grad_norm": 0.1475544422864914, "learning_rate": 0.01, "loss": 2.3383, "step": 3033 }, { "epoch": 0.31192849070173634, "grad_norm": 0.13449987769126892, "learning_rate": 0.01, "loss": 2.314, "step": 3036 }, { "epoch": 0.31223672043563133, "grad_norm": 0.06219559907913208, "learning_rate": 0.01, "loss": 2.3526, "step": 3039 }, { "epoch": 0.3125449501695264, "grad_norm": 0.05337538942694664, "learning_rate": 0.01, "loss": 2.3386, "step": 3042 }, { "epoch": 0.3128531799034214, "grad_norm": 0.11457488685846329, "learning_rate": 0.01, "loss": 2.3261, "step": 3045 }, { "epoch": 0.31316140963731637, "grad_norm": 0.23809069395065308, "learning_rate": 0.01, "loss": 2.3411, "step": 3048 }, { "epoch": 0.31346963937121136, "grad_norm": 0.11100046336650848, "learning_rate": 0.01, "loss": 2.3269, "step": 3051 }, { "epoch": 0.31377786910510636, "grad_norm": 0.05229029804468155, "learning_rate": 0.01, "loss": 2.3339, "step": 3054 }, { "epoch": 0.31408609883900135, "grad_norm": 0.05956039950251579, "learning_rate": 0.01, "loss": 2.3566, "step": 3057 }, { "epoch": 0.31439432857289634, "grad_norm": 0.13084881007671356, "learning_rate": 0.01, "loss": 2.332, "step": 3060 }, { "epoch": 0.31470255830679134, "grad_norm": 0.13889305293560028, "learning_rate": 0.01, "loss": 2.3118, "step": 3063 }, { "epoch": 0.31501078804068633, "grad_norm": 0.10443049669265747, "learning_rate": 0.01, "loss": 2.3246, "step": 3066 }, { "epoch": 0.3153190177745813, "grad_norm": 0.04321267828345299, "learning_rate": 0.01, "loss": 2.3353, "step": 3069 }, { "epoch": 0.3156272475084763, "grad_norm": 0.046873319894075394, "learning_rate": 0.01, "loss": 2.3144, "step": 3072 }, { "epoch": 0.3159354772423713, "grad_norm": 0.06548158824443817, "learning_rate": 0.01, "loss": 2.3285, "step": 3075 }, { "epoch": 0.3162437069762663, "grad_norm": 0.19105824828147888, "learning_rate": 0.01, "loss": 2.349, "step": 3078 }, { "epoch": 0.3165519367101613, "grad_norm": 0.14477142691612244, "learning_rate": 0.01, "loss": 2.3074, "step": 3081 }, { "epoch": 0.3168601664440563, "grad_norm": 0.08536936342716217, "learning_rate": 0.01, "loss": 2.3462, "step": 3084 }, { "epoch": 0.3171683961779513, "grad_norm": 0.0595535933971405, "learning_rate": 0.01, "loss": 2.3522, "step": 3087 }, { "epoch": 0.3174766259118463, "grad_norm": 0.058548733592033386, "learning_rate": 0.01, "loss": 2.3314, "step": 3090 }, { "epoch": 0.31778485564574127, "grad_norm": 0.04651311784982681, "learning_rate": 0.01, "loss": 2.3058, "step": 3093 }, { "epoch": 0.31809308537963626, "grad_norm": 0.0516805462539196, "learning_rate": 0.01, "loss": 2.3243, "step": 3096 }, { "epoch": 0.31840131511353126, "grad_norm": 0.16851970553398132, "learning_rate": 0.01, "loss": 2.3315, "step": 3099 }, { "epoch": 0.3187095448474263, "grad_norm": 0.08350600302219391, "learning_rate": 0.01, "loss": 2.315, "step": 3102 }, { "epoch": 0.3190177745813213, "grad_norm": 0.08899964392185211, "learning_rate": 0.01, "loss": 2.3218, "step": 3105 }, { "epoch": 0.3193260043152163, "grad_norm": 0.2424800843000412, "learning_rate": 0.01, "loss": 2.3207, "step": 3108 }, { "epoch": 0.3196342340491113, "grad_norm": 0.213782399892807, "learning_rate": 0.01, "loss": 2.3728, "step": 3111 }, { "epoch": 0.3199424637830063, "grad_norm": 0.15629780292510986, "learning_rate": 0.01, "loss": 2.3453, "step": 3114 }, { "epoch": 0.3202506935169013, "grad_norm": 0.06920924782752991, "learning_rate": 0.01, "loss": 2.3111, "step": 3117 }, { "epoch": 0.32055892325079627, "grad_norm": 0.04514181613922119, "learning_rate": 0.01, "loss": 2.33, "step": 3120 }, { "epoch": 0.32086715298469126, "grad_norm": 0.05500979721546173, "learning_rate": 0.01, "loss": 2.3078, "step": 3123 }, { "epoch": 0.32117538271858626, "grad_norm": 0.09148071706295013, "learning_rate": 0.01, "loss": 2.3457, "step": 3126 }, { "epoch": 0.32148361245248125, "grad_norm": 0.10582035779953003, "learning_rate": 0.01, "loss": 2.3114, "step": 3129 }, { "epoch": 0.32179184218637624, "grad_norm": 0.1557345986366272, "learning_rate": 0.01, "loss": 2.3334, "step": 3132 }, { "epoch": 0.32210007192027124, "grad_norm": 0.11304829269647598, "learning_rate": 0.01, "loss": 2.2797, "step": 3135 }, { "epoch": 0.32240830165416623, "grad_norm": 0.08236223459243774, "learning_rate": 0.01, "loss": 2.3357, "step": 3138 }, { "epoch": 0.3227165313880612, "grad_norm": 0.09718946367502213, "learning_rate": 0.01, "loss": 2.3096, "step": 3141 }, { "epoch": 0.3230247611219562, "grad_norm": 0.07455772161483765, "learning_rate": 0.01, "loss": 2.3127, "step": 3144 }, { "epoch": 0.3233329908558512, "grad_norm": 0.0556890033185482, "learning_rate": 0.01, "loss": 2.3088, "step": 3147 }, { "epoch": 0.3236412205897462, "grad_norm": 0.07595494389533997, "learning_rate": 0.01, "loss": 2.3159, "step": 3150 }, { "epoch": 0.3239494503236412, "grad_norm": 0.07064896821975708, "learning_rate": 0.01, "loss": 2.3336, "step": 3153 }, { "epoch": 0.3242576800575362, "grad_norm": 0.06646276265382767, "learning_rate": 0.01, "loss": 2.324, "step": 3156 }, { "epoch": 0.32456590979143124, "grad_norm": 0.08837945014238358, "learning_rate": 0.01, "loss": 2.3191, "step": 3159 }, { "epoch": 0.32487413952532623, "grad_norm": 0.13228796422481537, "learning_rate": 0.01, "loss": 2.3231, "step": 3162 }, { "epoch": 0.3251823692592212, "grad_norm": 0.1080455407500267, "learning_rate": 0.01, "loss": 2.3341, "step": 3165 }, { "epoch": 0.3254905989931162, "grad_norm": 0.1073957234621048, "learning_rate": 0.01, "loss": 2.3237, "step": 3168 }, { "epoch": 0.3257988287270112, "grad_norm": 0.12472347915172577, "learning_rate": 0.01, "loss": 2.3315, "step": 3171 }, { "epoch": 0.3261070584609062, "grad_norm": 0.09123571217060089, "learning_rate": 0.01, "loss": 2.3588, "step": 3174 }, { "epoch": 0.3264152881948012, "grad_norm": 0.07830306142568588, "learning_rate": 0.01, "loss": 2.3273, "step": 3177 }, { "epoch": 0.3267235179286962, "grad_norm": 0.11552650481462479, "learning_rate": 0.01, "loss": 2.3407, "step": 3180 }, { "epoch": 0.3270317476625912, "grad_norm": 0.13251489400863647, "learning_rate": 0.01, "loss": 2.3241, "step": 3183 }, { "epoch": 0.3273399773964862, "grad_norm": 0.12775808572769165, "learning_rate": 0.01, "loss": 2.331, "step": 3186 }, { "epoch": 0.3276482071303812, "grad_norm": 0.12069859355688095, "learning_rate": 0.01, "loss": 2.3486, "step": 3189 }, { "epoch": 0.32795643686427617, "grad_norm": 0.059109434485435486, "learning_rate": 0.01, "loss": 2.2969, "step": 3192 }, { "epoch": 0.32826466659817116, "grad_norm": 0.12731850147247314, "learning_rate": 0.01, "loss": 2.3269, "step": 3195 }, { "epoch": 0.32857289633206616, "grad_norm": 0.15247757732868195, "learning_rate": 0.01, "loss": 2.3312, "step": 3198 }, { "epoch": 0.32888112606596115, "grad_norm": 0.128463476896286, "learning_rate": 0.01, "loss": 2.3275, "step": 3201 }, { "epoch": 0.32918935579985614, "grad_norm": 0.09406638145446777, "learning_rate": 0.01, "loss": 2.3205, "step": 3204 }, { "epoch": 0.32949758553375114, "grad_norm": 0.10524141043424606, "learning_rate": 0.01, "loss": 2.3423, "step": 3207 }, { "epoch": 0.32980581526764613, "grad_norm": 0.11357913911342621, "learning_rate": 0.01, "loss": 2.3071, "step": 3210 }, { "epoch": 0.3301140450015411, "grad_norm": 0.06979521363973618, "learning_rate": 0.01, "loss": 2.3319, "step": 3213 }, { "epoch": 0.33042227473543617, "grad_norm": 0.07000034302473068, "learning_rate": 0.01, "loss": 2.3523, "step": 3216 }, { "epoch": 0.33073050446933117, "grad_norm": 0.07495003193616867, "learning_rate": 0.01, "loss": 2.305, "step": 3219 }, { "epoch": 0.33103873420322616, "grad_norm": 0.07131810486316681, "learning_rate": 0.01, "loss": 2.2896, "step": 3222 }, { "epoch": 0.33134696393712115, "grad_norm": 0.051389019936323166, "learning_rate": 0.01, "loss": 2.2974, "step": 3225 }, { "epoch": 0.33165519367101615, "grad_norm": 0.05159701779484749, "learning_rate": 0.01, "loss": 2.3344, "step": 3228 }, { "epoch": 0.33196342340491114, "grad_norm": 0.07632975280284882, "learning_rate": 0.01, "loss": 2.3091, "step": 3231 }, { "epoch": 0.33227165313880613, "grad_norm": 0.08053800463676453, "learning_rate": 0.01, "loss": 2.298, "step": 3234 }, { "epoch": 0.3325798828727011, "grad_norm": 0.1371622234582901, "learning_rate": 0.01, "loss": 2.3095, "step": 3237 }, { "epoch": 0.3328881126065961, "grad_norm": 0.11367069184780121, "learning_rate": 0.01, "loss": 2.3212, "step": 3240 }, { "epoch": 0.3331963423404911, "grad_norm": 0.13252900540828705, "learning_rate": 0.01, "loss": 2.3238, "step": 3243 }, { "epoch": 0.3335045720743861, "grad_norm": 0.15517258644104004, "learning_rate": 0.01, "loss": 2.3263, "step": 3246 }, { "epoch": 0.3338128018082811, "grad_norm": 0.14029370248317719, "learning_rate": 0.01, "loss": 2.3457, "step": 3249 }, { "epoch": 0.3341210315421761, "grad_norm": 0.105759397149086, "learning_rate": 0.01, "loss": 2.3008, "step": 3252 }, { "epoch": 0.3344292612760711, "grad_norm": 0.04762979596853256, "learning_rate": 0.01, "loss": 2.3306, "step": 3255 }, { "epoch": 0.3347374910099661, "grad_norm": 0.12065446376800537, "learning_rate": 0.01, "loss": 2.2904, "step": 3258 }, { "epoch": 0.3350457207438611, "grad_norm": 0.08886688947677612, "learning_rate": 0.01, "loss": 2.3243, "step": 3261 }, { "epoch": 0.33535395047775607, "grad_norm": 0.08021339774131775, "learning_rate": 0.01, "loss": 2.3313, "step": 3264 }, { "epoch": 0.33566218021165106, "grad_norm": 0.04490290582180023, "learning_rate": 0.01, "loss": 2.2888, "step": 3267 }, { "epoch": 0.33597040994554606, "grad_norm": 0.061480812728405, "learning_rate": 0.01, "loss": 2.2898, "step": 3270 }, { "epoch": 0.33627863967944105, "grad_norm": 0.04230419546365738, "learning_rate": 0.01, "loss": 2.3062, "step": 3273 }, { "epoch": 0.3365868694133361, "grad_norm": 0.12344948202371597, "learning_rate": 0.01, "loss": 2.3105, "step": 3276 }, { "epoch": 0.3368950991472311, "grad_norm": 0.13087160885334015, "learning_rate": 0.01, "loss": 2.3388, "step": 3279 }, { "epoch": 0.3372033288811261, "grad_norm": 0.06671308726072311, "learning_rate": 0.01, "loss": 2.3062, "step": 3282 }, { "epoch": 0.3375115586150211, "grad_norm": 0.055828843265771866, "learning_rate": 0.01, "loss": 2.3227, "step": 3285 }, { "epoch": 0.3378197883489161, "grad_norm": 0.07760481536388397, "learning_rate": 0.01, "loss": 2.307, "step": 3288 }, { "epoch": 0.33812801808281107, "grad_norm": 0.08074722439050674, "learning_rate": 0.01, "loss": 2.3363, "step": 3291 }, { "epoch": 0.33843624781670606, "grad_norm": 0.046514566987752914, "learning_rate": 0.01, "loss": 2.3152, "step": 3294 }, { "epoch": 0.33874447755060105, "grad_norm": 0.15358585119247437, "learning_rate": 0.01, "loss": 2.3114, "step": 3297 }, { "epoch": 0.33905270728449605, "grad_norm": 0.09048300981521606, "learning_rate": 0.01, "loss": 2.3218, "step": 3300 }, { "epoch": 0.33936093701839104, "grad_norm": 0.08199465274810791, "learning_rate": 0.01, "loss": 2.3133, "step": 3303 }, { "epoch": 0.33966916675228603, "grad_norm": 0.13738159835338593, "learning_rate": 0.01, "loss": 2.3108, "step": 3306 }, { "epoch": 0.339977396486181, "grad_norm": 0.11493804305791855, "learning_rate": 0.01, "loss": 2.2996, "step": 3309 }, { "epoch": 0.340285626220076, "grad_norm": 0.06872740387916565, "learning_rate": 0.01, "loss": 2.306, "step": 3312 }, { "epoch": 0.340593855953971, "grad_norm": 0.055139992386102676, "learning_rate": 0.01, "loss": 2.3129, "step": 3315 }, { "epoch": 0.340902085687866, "grad_norm": 0.16477546095848083, "learning_rate": 0.01, "loss": 2.3138, "step": 3318 }, { "epoch": 0.341210315421761, "grad_norm": 0.06387230008840561, "learning_rate": 0.01, "loss": 2.3025, "step": 3321 }, { "epoch": 0.341518545155656, "grad_norm": 0.1657593995332718, "learning_rate": 0.01, "loss": 2.3255, "step": 3324 }, { "epoch": 0.341826774889551, "grad_norm": 0.08980764448642731, "learning_rate": 0.01, "loss": 2.3024, "step": 3327 }, { "epoch": 0.342135004623446, "grad_norm": 0.05479981005191803, "learning_rate": 0.01, "loss": 2.2955, "step": 3330 }, { "epoch": 0.34244323435734103, "grad_norm": 0.05986113101243973, "learning_rate": 0.01, "loss": 2.3078, "step": 3333 }, { "epoch": 0.342751464091236, "grad_norm": 0.1339874267578125, "learning_rate": 0.01, "loss": 2.2974, "step": 3336 }, { "epoch": 0.343059693825131, "grad_norm": 0.11250229179859161, "learning_rate": 0.01, "loss": 2.3162, "step": 3339 }, { "epoch": 0.343367923559026, "grad_norm": 0.12179972976446152, "learning_rate": 0.01, "loss": 2.2746, "step": 3342 }, { "epoch": 0.343676153292921, "grad_norm": 0.10306143760681152, "learning_rate": 0.01, "loss": 2.3008, "step": 3345 }, { "epoch": 0.343984383026816, "grad_norm": 0.08372616767883301, "learning_rate": 0.01, "loss": 2.2962, "step": 3348 }, { "epoch": 0.344292612760711, "grad_norm": 0.05286876857280731, "learning_rate": 0.01, "loss": 2.3067, "step": 3351 }, { "epoch": 0.344600842494606, "grad_norm": 0.06248036026954651, "learning_rate": 0.01, "loss": 2.3432, "step": 3354 }, { "epoch": 0.344909072228501, "grad_norm": 0.1287723332643509, "learning_rate": 0.01, "loss": 2.3064, "step": 3357 }, { "epoch": 0.345217301962396, "grad_norm": 0.08843682706356049, "learning_rate": 0.01, "loss": 2.3059, "step": 3360 }, { "epoch": 0.34552553169629097, "grad_norm": 0.07060680538415909, "learning_rate": 0.01, "loss": 2.2627, "step": 3363 }, { "epoch": 0.34583376143018596, "grad_norm": 0.10443838685750961, "learning_rate": 0.01, "loss": 2.2876, "step": 3366 }, { "epoch": 0.34614199116408095, "grad_norm": 0.06748315691947937, "learning_rate": 0.01, "loss": 2.3182, "step": 3369 }, { "epoch": 0.34645022089797595, "grad_norm": 0.06599223613739014, "learning_rate": 0.01, "loss": 2.2997, "step": 3372 }, { "epoch": 0.34675845063187094, "grad_norm": 0.08530016988515854, "learning_rate": 0.01, "loss": 2.2959, "step": 3375 }, { "epoch": 0.34706668036576593, "grad_norm": 0.10694181174039841, "learning_rate": 0.01, "loss": 2.3248, "step": 3378 }, { "epoch": 0.3473749100996609, "grad_norm": 0.06598237156867981, "learning_rate": 0.01, "loss": 2.2837, "step": 3381 }, { "epoch": 0.3476831398335559, "grad_norm": 0.0782204419374466, "learning_rate": 0.01, "loss": 2.2926, "step": 3384 }, { "epoch": 0.3479913695674509, "grad_norm": 0.09585436433553696, "learning_rate": 0.01, "loss": 2.2984, "step": 3387 }, { "epoch": 0.34829959930134596, "grad_norm": 0.061477720737457275, "learning_rate": 0.01, "loss": 2.2693, "step": 3390 }, { "epoch": 0.34860782903524096, "grad_norm": 0.104725681245327, "learning_rate": 0.01, "loss": 2.2887, "step": 3393 }, { "epoch": 0.34891605876913595, "grad_norm": 0.12205322831869125, "learning_rate": 0.01, "loss": 2.3052, "step": 3396 }, { "epoch": 0.34922428850303094, "grad_norm": 0.16279913485050201, "learning_rate": 0.01, "loss": 2.2771, "step": 3399 }, { "epoch": 0.34953251823692594, "grad_norm": 0.059565551578998566, "learning_rate": 0.01, "loss": 2.3027, "step": 3402 }, { "epoch": 0.34984074797082093, "grad_norm": 0.06318376958370209, "learning_rate": 0.01, "loss": 2.3131, "step": 3405 }, { "epoch": 0.3501489777047159, "grad_norm": 0.05476443096995354, "learning_rate": 0.01, "loss": 2.2953, "step": 3408 }, { "epoch": 0.3504572074386109, "grad_norm": 0.07989142090082169, "learning_rate": 0.01, "loss": 2.31, "step": 3411 }, { "epoch": 0.3507654371725059, "grad_norm": 0.15566086769104004, "learning_rate": 0.01, "loss": 2.2839, "step": 3414 }, { "epoch": 0.3510736669064009, "grad_norm": 0.060441337525844574, "learning_rate": 0.01, "loss": 2.2952, "step": 3417 }, { "epoch": 0.3513818966402959, "grad_norm": 0.06277213245630264, "learning_rate": 0.01, "loss": 2.32, "step": 3420 }, { "epoch": 0.3516901263741909, "grad_norm": 0.04959907755255699, "learning_rate": 0.01, "loss": 2.3116, "step": 3423 }, { "epoch": 0.3519983561080859, "grad_norm": 0.06766139715909958, "learning_rate": 0.01, "loss": 2.3201, "step": 3426 }, { "epoch": 0.3523065858419809, "grad_norm": 0.053323931992053986, "learning_rate": 0.01, "loss": 2.3271, "step": 3429 }, { "epoch": 0.3526148155758759, "grad_norm": 0.06396596878767014, "learning_rate": 0.01, "loss": 2.2929, "step": 3432 }, { "epoch": 0.35292304530977087, "grad_norm": 0.07360636442899704, "learning_rate": 0.01, "loss": 2.2918, "step": 3435 }, { "epoch": 0.35323127504366586, "grad_norm": 0.10262563079595566, "learning_rate": 0.01, "loss": 2.2871, "step": 3438 }, { "epoch": 0.35353950477756085, "grad_norm": 0.09783780574798584, "learning_rate": 0.01, "loss": 2.3229, "step": 3441 }, { "epoch": 0.35384773451145585, "grad_norm": 0.08542583137750626, "learning_rate": 0.01, "loss": 2.2887, "step": 3444 }, { "epoch": 0.35415596424535084, "grad_norm": 0.11864805966615677, "learning_rate": 0.01, "loss": 2.2848, "step": 3447 }, { "epoch": 0.3544641939792459, "grad_norm": 0.10997387021780014, "learning_rate": 0.01, "loss": 2.2897, "step": 3450 }, { "epoch": 0.3547724237131409, "grad_norm": 0.10915081202983856, "learning_rate": 0.01, "loss": 2.3114, "step": 3453 }, { "epoch": 0.3550806534470359, "grad_norm": 0.15109725296497345, "learning_rate": 0.01, "loss": 2.2933, "step": 3456 }, { "epoch": 0.35538888318093087, "grad_norm": 0.04911811649799347, "learning_rate": 0.01, "loss": 2.3035, "step": 3459 }, { "epoch": 0.35569711291482586, "grad_norm": 0.12352598458528519, "learning_rate": 0.01, "loss": 2.2897, "step": 3462 }, { "epoch": 0.35600534264872086, "grad_norm": 0.10834213346242905, "learning_rate": 0.01, "loss": 2.2879, "step": 3465 }, { "epoch": 0.35631357238261585, "grad_norm": 0.10665787756443024, "learning_rate": 0.01, "loss": 2.2614, "step": 3468 }, { "epoch": 0.35662180211651084, "grad_norm": 0.0898185670375824, "learning_rate": 0.01, "loss": 2.2943, "step": 3471 }, { "epoch": 0.35693003185040584, "grad_norm": 0.07015782594680786, "learning_rate": 0.01, "loss": 2.298, "step": 3474 }, { "epoch": 0.35723826158430083, "grad_norm": 0.1292288452386856, "learning_rate": 0.01, "loss": 2.3122, "step": 3477 }, { "epoch": 0.3575464913181958, "grad_norm": 0.09300121665000916, "learning_rate": 0.01, "loss": 2.2769, "step": 3480 }, { "epoch": 0.3578547210520908, "grad_norm": 0.0449809767305851, "learning_rate": 0.01, "loss": 2.2564, "step": 3483 }, { "epoch": 0.3581629507859858, "grad_norm": 0.051362160593271255, "learning_rate": 0.01, "loss": 2.2739, "step": 3486 }, { "epoch": 0.3584711805198808, "grad_norm": 0.12473469972610474, "learning_rate": 0.01, "loss": 2.2844, "step": 3489 }, { "epoch": 0.3587794102537758, "grad_norm": 0.0925057902932167, "learning_rate": 0.01, "loss": 2.2618, "step": 3492 }, { "epoch": 0.3590876399876708, "grad_norm": 0.1026608943939209, "learning_rate": 0.01, "loss": 2.2814, "step": 3495 }, { "epoch": 0.3593958697215658, "grad_norm": 0.0995681881904602, "learning_rate": 0.01, "loss": 2.2861, "step": 3498 }, { "epoch": 0.3597040994554608, "grad_norm": 0.06513385474681854, "learning_rate": 0.01, "loss": 2.2827, "step": 3501 }, { "epoch": 0.3600123291893558, "grad_norm": 0.06724824756383896, "learning_rate": 0.01, "loss": 2.2799, "step": 3504 }, { "epoch": 0.3603205589232508, "grad_norm": 0.06367610394954681, "learning_rate": 0.01, "loss": 2.2846, "step": 3507 }, { "epoch": 0.3606287886571458, "grad_norm": 0.07489916682243347, "learning_rate": 0.01, "loss": 2.2816, "step": 3510 }, { "epoch": 0.3609370183910408, "grad_norm": 0.11221667379140854, "learning_rate": 0.01, "loss": 2.2869, "step": 3513 }, { "epoch": 0.3612452481249358, "grad_norm": 0.09854032099246979, "learning_rate": 0.01, "loss": 2.2646, "step": 3516 }, { "epoch": 0.3615534778588308, "grad_norm": 0.09218656271696091, "learning_rate": 0.01, "loss": 2.2844, "step": 3519 }, { "epoch": 0.3618617075927258, "grad_norm": 0.1531379073858261, "learning_rate": 0.01, "loss": 2.279, "step": 3522 }, { "epoch": 0.3621699373266208, "grad_norm": 0.07070820778608322, "learning_rate": 0.01, "loss": 2.2747, "step": 3525 }, { "epoch": 0.3624781670605158, "grad_norm": 0.1057102233171463, "learning_rate": 0.01, "loss": 2.275, "step": 3528 }, { "epoch": 0.36278639679441077, "grad_norm": 0.049471016973257065, "learning_rate": 0.01, "loss": 2.3013, "step": 3531 }, { "epoch": 0.36309462652830576, "grad_norm": 0.08196526020765305, "learning_rate": 0.01, "loss": 2.2571, "step": 3534 }, { "epoch": 0.36340285626220076, "grad_norm": 0.09507983922958374, "learning_rate": 0.01, "loss": 2.3196, "step": 3537 }, { "epoch": 0.36371108599609575, "grad_norm": 0.089228555560112, "learning_rate": 0.01, "loss": 2.2539, "step": 3540 }, { "epoch": 0.36401931572999074, "grad_norm": 0.0866270586848259, "learning_rate": 0.01, "loss": 2.284, "step": 3543 }, { "epoch": 0.36432754546388574, "grad_norm": 0.13805072009563446, "learning_rate": 0.01, "loss": 2.2723, "step": 3546 }, { "epoch": 0.36463577519778073, "grad_norm": 0.09308724105358124, "learning_rate": 0.01, "loss": 2.2969, "step": 3549 }, { "epoch": 0.3649440049316757, "grad_norm": 0.07004178315401077, "learning_rate": 0.01, "loss": 2.2959, "step": 3552 }, { "epoch": 0.3652522346655707, "grad_norm": 0.09345975518226624, "learning_rate": 0.01, "loss": 2.2656, "step": 3555 }, { "epoch": 0.3655604643994657, "grad_norm": 0.07694482058286667, "learning_rate": 0.01, "loss": 2.2921, "step": 3558 }, { "epoch": 0.3658686941333607, "grad_norm": 0.05591150000691414, "learning_rate": 0.01, "loss": 2.2869, "step": 3561 }, { "epoch": 0.36617692386725575, "grad_norm": 0.06863993406295776, "learning_rate": 0.01, "loss": 2.2897, "step": 3564 }, { "epoch": 0.36648515360115075, "grad_norm": 0.06258527934551239, "learning_rate": 0.01, "loss": 2.2994, "step": 3567 }, { "epoch": 0.36679338333504574, "grad_norm": 0.1049329936504364, "learning_rate": 0.01, "loss": 2.2794, "step": 3570 }, { "epoch": 0.36710161306894074, "grad_norm": 0.1229025200009346, "learning_rate": 0.01, "loss": 2.2949, "step": 3573 }, { "epoch": 0.36740984280283573, "grad_norm": 0.13274389505386353, "learning_rate": 0.01, "loss": 2.2791, "step": 3576 }, { "epoch": 0.3677180725367307, "grad_norm": 0.09388844668865204, "learning_rate": 0.01, "loss": 2.3067, "step": 3579 }, { "epoch": 0.3680263022706257, "grad_norm": 0.05375714227557182, "learning_rate": 0.01, "loss": 2.2946, "step": 3582 }, { "epoch": 0.3683345320045207, "grad_norm": 0.059105634689331055, "learning_rate": 0.01, "loss": 2.2821, "step": 3585 }, { "epoch": 0.3686427617384157, "grad_norm": 0.055578552186489105, "learning_rate": 0.01, "loss": 2.2694, "step": 3588 }, { "epoch": 0.3689509914723107, "grad_norm": 0.08778764307498932, "learning_rate": 0.01, "loss": 2.2712, "step": 3591 }, { "epoch": 0.3692592212062057, "grad_norm": 0.1044803187251091, "learning_rate": 0.01, "loss": 2.2797, "step": 3594 }, { "epoch": 0.3695674509401007, "grad_norm": 0.15398399531841278, "learning_rate": 0.01, "loss": 2.3042, "step": 3597 }, { "epoch": 0.3698756806739957, "grad_norm": 0.11562564969062805, "learning_rate": 0.01, "loss": 2.2609, "step": 3600 }, { "epoch": 0.37018391040789067, "grad_norm": 0.060630831867456436, "learning_rate": 0.01, "loss": 2.2663, "step": 3603 }, { "epoch": 0.37049214014178566, "grad_norm": 0.0576477013528347, "learning_rate": 0.01, "loss": 2.2974, "step": 3606 }, { "epoch": 0.37080036987568066, "grad_norm": 0.059915438294410706, "learning_rate": 0.01, "loss": 2.3031, "step": 3609 }, { "epoch": 0.37110859960957565, "grad_norm": 0.10807155817747116, "learning_rate": 0.01, "loss": 2.2739, "step": 3612 }, { "epoch": 0.37141682934347064, "grad_norm": 0.09196165949106216, "learning_rate": 0.01, "loss": 2.3054, "step": 3615 }, { "epoch": 0.37172505907736564, "grad_norm": 0.07379795610904694, "learning_rate": 0.01, "loss": 2.2805, "step": 3618 }, { "epoch": 0.3720332888112607, "grad_norm": 0.06034912168979645, "learning_rate": 0.01, "loss": 2.2549, "step": 3621 }, { "epoch": 0.3723415185451557, "grad_norm": 0.13983361423015594, "learning_rate": 0.01, "loss": 2.269, "step": 3624 }, { "epoch": 0.3726497482790507, "grad_norm": 0.11592069268226624, "learning_rate": 0.01, "loss": 2.2903, "step": 3627 }, { "epoch": 0.37295797801294567, "grad_norm": 0.15428505837917328, "learning_rate": 0.01, "loss": 2.2918, "step": 3630 }, { "epoch": 0.37326620774684066, "grad_norm": 0.19936774671077728, "learning_rate": 0.01, "loss": 2.2782, "step": 3633 }, { "epoch": 0.37357443748073565, "grad_norm": 0.15364627540111542, "learning_rate": 0.01, "loss": 2.2736, "step": 3636 }, { "epoch": 0.37388266721463065, "grad_norm": 0.047554273158311844, "learning_rate": 0.01, "loss": 2.3172, "step": 3639 }, { "epoch": 0.37419089694852564, "grad_norm": 0.0555570051074028, "learning_rate": 0.01, "loss": 2.2731, "step": 3642 }, { "epoch": 0.37449912668242064, "grad_norm": 0.052204012870788574, "learning_rate": 0.01, "loss": 2.281, "step": 3645 }, { "epoch": 0.37480735641631563, "grad_norm": 0.09206510335206985, "learning_rate": 0.01, "loss": 2.2639, "step": 3648 }, { "epoch": 0.3751155861502106, "grad_norm": 0.1199311912059784, "learning_rate": 0.01, "loss": 2.2873, "step": 3651 }, { "epoch": 0.3754238158841056, "grad_norm": 0.08949270099401474, "learning_rate": 0.01, "loss": 2.2668, "step": 3654 }, { "epoch": 0.3757320456180006, "grad_norm": 0.08521883934736252, "learning_rate": 0.01, "loss": 2.247, "step": 3657 }, { "epoch": 0.3760402753518956, "grad_norm": 0.07689694315195084, "learning_rate": 0.01, "loss": 2.2904, "step": 3660 }, { "epoch": 0.3763485050857906, "grad_norm": 0.08761987835168839, "learning_rate": 0.01, "loss": 2.2761, "step": 3663 }, { "epoch": 0.3766567348196856, "grad_norm": 0.056420013308525085, "learning_rate": 0.01, "loss": 2.259, "step": 3666 }, { "epoch": 0.3769649645535806, "grad_norm": 0.06192856654524803, "learning_rate": 0.01, "loss": 2.2294, "step": 3669 }, { "epoch": 0.3772731942874756, "grad_norm": 0.1021333709359169, "learning_rate": 0.01, "loss": 2.2649, "step": 3672 }, { "epoch": 0.37758142402137057, "grad_norm": 0.10071670264005661, "learning_rate": 0.01, "loss": 2.2584, "step": 3675 }, { "epoch": 0.37788965375526556, "grad_norm": 0.05968625843524933, "learning_rate": 0.01, "loss": 2.2699, "step": 3678 }, { "epoch": 0.3781978834891606, "grad_norm": 0.07489661872386932, "learning_rate": 0.01, "loss": 2.2663, "step": 3681 }, { "epoch": 0.3785061132230556, "grad_norm": 0.07880943268537521, "learning_rate": 0.01, "loss": 2.2709, "step": 3684 }, { "epoch": 0.3788143429569506, "grad_norm": 0.055632054805755615, "learning_rate": 0.01, "loss": 2.272, "step": 3687 }, { "epoch": 0.3791225726908456, "grad_norm": 0.05365302786231041, "learning_rate": 0.01, "loss": 2.2268, "step": 3690 }, { "epoch": 0.3794308024247406, "grad_norm": 0.0802481397986412, "learning_rate": 0.01, "loss": 2.2631, "step": 3693 }, { "epoch": 0.3797390321586356, "grad_norm": 0.1312764585018158, "learning_rate": 0.01, "loss": 2.2985, "step": 3696 }, { "epoch": 0.3800472618925306, "grad_norm": 0.14543971419334412, "learning_rate": 0.01, "loss": 2.25, "step": 3699 }, { "epoch": 0.38035549162642557, "grad_norm": 0.05727002024650574, "learning_rate": 0.01, "loss": 2.2556, "step": 3702 }, { "epoch": 0.38066372136032056, "grad_norm": 0.07309607416391373, "learning_rate": 0.01, "loss": 2.2574, "step": 3705 }, { "epoch": 0.38097195109421556, "grad_norm": 0.03849095106124878, "learning_rate": 0.01, "loss": 2.2501, "step": 3708 }, { "epoch": 0.38128018082811055, "grad_norm": 0.0623021237552166, "learning_rate": 0.01, "loss": 2.2672, "step": 3711 }, { "epoch": 0.38158841056200554, "grad_norm": 0.08916610479354858, "learning_rate": 0.01, "loss": 2.2683, "step": 3714 }, { "epoch": 0.38189664029590054, "grad_norm": 0.08126388490200043, "learning_rate": 0.01, "loss": 2.2574, "step": 3717 }, { "epoch": 0.38220487002979553, "grad_norm": 0.07121114432811737, "learning_rate": 0.01, "loss": 2.2358, "step": 3720 }, { "epoch": 0.3825130997636905, "grad_norm": 0.07406505942344666, "learning_rate": 0.01, "loss": 2.2736, "step": 3723 }, { "epoch": 0.3828213294975855, "grad_norm": 0.13355331122875214, "learning_rate": 0.01, "loss": 2.2685, "step": 3726 }, { "epoch": 0.3831295592314805, "grad_norm": 0.05672430619597435, "learning_rate": 0.01, "loss": 2.2913, "step": 3729 }, { "epoch": 0.3834377889653755, "grad_norm": 0.047647468745708466, "learning_rate": 0.01, "loss": 2.2533, "step": 3732 }, { "epoch": 0.3837460186992705, "grad_norm": 0.059008341282606125, "learning_rate": 0.01, "loss": 2.2867, "step": 3735 }, { "epoch": 0.38405424843316555, "grad_norm": 0.06551840156316757, "learning_rate": 0.01, "loss": 2.2742, "step": 3738 }, { "epoch": 0.38436247816706054, "grad_norm": 0.08781883865594864, "learning_rate": 0.01, "loss": 2.2427, "step": 3741 }, { "epoch": 0.38467070790095553, "grad_norm": 0.06808102875947952, "learning_rate": 0.01, "loss": 2.2493, "step": 3744 }, { "epoch": 0.3849789376348505, "grad_norm": 0.06570697575807571, "learning_rate": 0.01, "loss": 2.2445, "step": 3747 }, { "epoch": 0.3852871673687455, "grad_norm": 0.08742080628871918, "learning_rate": 0.01, "loss": 2.2576, "step": 3750 }, { "epoch": 0.3855953971026405, "grad_norm": 0.1518019735813141, "learning_rate": 0.01, "loss": 2.2819, "step": 3753 }, { "epoch": 0.3859036268365355, "grad_norm": 0.10349754244089127, "learning_rate": 0.01, "loss": 2.2465, "step": 3756 }, { "epoch": 0.3862118565704305, "grad_norm": 0.06008581072092056, "learning_rate": 0.01, "loss": 2.2817, "step": 3759 }, { "epoch": 0.3865200863043255, "grad_norm": 0.0450257770717144, "learning_rate": 0.01, "loss": 2.2585, "step": 3762 }, { "epoch": 0.3868283160382205, "grad_norm": 0.04145176708698273, "learning_rate": 0.01, "loss": 2.2634, "step": 3765 }, { "epoch": 0.3871365457721155, "grad_norm": 0.17084141075611115, "learning_rate": 0.01, "loss": 2.2355, "step": 3768 }, { "epoch": 0.3874447755060105, "grad_norm": 0.06679602712392807, "learning_rate": 0.01, "loss": 2.2737, "step": 3771 }, { "epoch": 0.38775300523990547, "grad_norm": 0.05363382026553154, "learning_rate": 0.01, "loss": 2.244, "step": 3774 }, { "epoch": 0.38806123497380046, "grad_norm": 0.05722133815288544, "learning_rate": 0.01, "loss": 2.2515, "step": 3777 }, { "epoch": 0.38836946470769546, "grad_norm": 0.06288215517997742, "learning_rate": 0.01, "loss": 2.2625, "step": 3780 }, { "epoch": 0.38867769444159045, "grad_norm": 0.05087801814079285, "learning_rate": 0.01, "loss": 2.2883, "step": 3783 }, { "epoch": 0.38898592417548544, "grad_norm": 0.08160998672246933, "learning_rate": 0.01, "loss": 2.2462, "step": 3786 }, { "epoch": 0.38929415390938044, "grad_norm": 0.22291240096092224, "learning_rate": 0.01, "loss": 2.2613, "step": 3789 }, { "epoch": 0.38960238364327543, "grad_norm": 0.11482773721218109, "learning_rate": 0.01, "loss": 2.2633, "step": 3792 }, { "epoch": 0.3899106133771705, "grad_norm": 0.056299589574337006, "learning_rate": 0.01, "loss": 2.2896, "step": 3795 }, { "epoch": 0.39021884311106547, "grad_norm": 0.04524017125368118, "learning_rate": 0.01, "loss": 2.2543, "step": 3798 }, { "epoch": 0.39052707284496047, "grad_norm": 0.0903107225894928, "learning_rate": 0.01, "loss": 2.2801, "step": 3801 }, { "epoch": 0.39083530257885546, "grad_norm": 0.0645504966378212, "learning_rate": 0.01, "loss": 2.2628, "step": 3804 }, { "epoch": 0.39114353231275045, "grad_norm": 0.06752094626426697, "learning_rate": 0.01, "loss": 2.2732, "step": 3807 }, { "epoch": 0.39145176204664545, "grad_norm": 0.04459339380264282, "learning_rate": 0.01, "loss": 2.2601, "step": 3810 }, { "epoch": 0.39175999178054044, "grad_norm": 0.07300913333892822, "learning_rate": 0.01, "loss": 2.2437, "step": 3813 }, { "epoch": 0.39206822151443543, "grad_norm": 0.16804097592830658, "learning_rate": 0.01, "loss": 2.26, "step": 3816 }, { "epoch": 0.3923764512483304, "grad_norm": 0.10682248324155807, "learning_rate": 0.01, "loss": 2.2764, "step": 3819 }, { "epoch": 0.3926846809822254, "grad_norm": 0.046895258128643036, "learning_rate": 0.01, "loss": 2.2654, "step": 3822 }, { "epoch": 0.3929929107161204, "grad_norm": 0.05799179524183273, "learning_rate": 0.01, "loss": 2.2254, "step": 3825 }, { "epoch": 0.3933011404500154, "grad_norm": 0.0474528968334198, "learning_rate": 0.01, "loss": 2.2604, "step": 3828 }, { "epoch": 0.3936093701839104, "grad_norm": 0.1437537968158722, "learning_rate": 0.01, "loss": 2.2532, "step": 3831 }, { "epoch": 0.3939175999178054, "grad_norm": 0.06202014535665512, "learning_rate": 0.01, "loss": 2.2486, "step": 3834 }, { "epoch": 0.3942258296517004, "grad_norm": 0.09379147738218307, "learning_rate": 0.01, "loss": 2.2602, "step": 3837 }, { "epoch": 0.3945340593855954, "grad_norm": 0.07898830622434616, "learning_rate": 0.01, "loss": 2.2605, "step": 3840 }, { "epoch": 0.3948422891194904, "grad_norm": 0.10186600685119629, "learning_rate": 0.01, "loss": 2.2807, "step": 3843 }, { "epoch": 0.39515051885338537, "grad_norm": 0.08611535280942917, "learning_rate": 0.01, "loss": 2.2571, "step": 3846 }, { "epoch": 0.39545874858728036, "grad_norm": 0.10435480624437332, "learning_rate": 0.01, "loss": 2.2721, "step": 3849 }, { "epoch": 0.39576697832117536, "grad_norm": 0.11543019860982895, "learning_rate": 0.01, "loss": 2.2598, "step": 3852 }, { "epoch": 0.3960752080550704, "grad_norm": 0.11996404081583023, "learning_rate": 0.01, "loss": 2.2536, "step": 3855 }, { "epoch": 0.3963834377889654, "grad_norm": 0.05615765228867531, "learning_rate": 0.01, "loss": 2.2637, "step": 3858 }, { "epoch": 0.3966916675228604, "grad_norm": 0.06568838655948639, "learning_rate": 0.01, "loss": 2.2756, "step": 3861 }, { "epoch": 0.3969998972567554, "grad_norm": 0.07747132331132889, "learning_rate": 0.01, "loss": 2.2816, "step": 3864 }, { "epoch": 0.3973081269906504, "grad_norm": 0.057373497635126114, "learning_rate": 0.01, "loss": 2.255, "step": 3867 }, { "epoch": 0.3976163567245454, "grad_norm": 0.11501277983188629, "learning_rate": 0.01, "loss": 2.2494, "step": 3870 }, { "epoch": 0.39792458645844037, "grad_norm": 0.07761958241462708, "learning_rate": 0.01, "loss": 2.2459, "step": 3873 }, { "epoch": 0.39823281619233536, "grad_norm": 0.06263428926467896, "learning_rate": 0.01, "loss": 2.2649, "step": 3876 }, { "epoch": 0.39854104592623035, "grad_norm": 0.04552373290061951, "learning_rate": 0.01, "loss": 2.2578, "step": 3879 }, { "epoch": 0.39884927566012535, "grad_norm": 0.0631655901670456, "learning_rate": 0.01, "loss": 2.2648, "step": 3882 }, { "epoch": 0.39915750539402034, "grad_norm": 0.06519417464733124, "learning_rate": 0.01, "loss": 2.2438, "step": 3885 }, { "epoch": 0.39946573512791533, "grad_norm": 0.10446424037218094, "learning_rate": 0.01, "loss": 2.2815, "step": 3888 }, { "epoch": 0.3997739648618103, "grad_norm": 0.07533372938632965, "learning_rate": 0.01, "loss": 2.272, "step": 3891 }, { "epoch": 0.4000821945957053, "grad_norm": 0.05748215690255165, "learning_rate": 0.01, "loss": 2.2971, "step": 3894 }, { "epoch": 0.4003904243296003, "grad_norm": 0.051343973726034164, "learning_rate": 0.01, "loss": 2.2316, "step": 3897 }, { "epoch": 0.4006986540634953, "grad_norm": 0.04799075797200203, "learning_rate": 0.01, "loss": 2.2333, "step": 3900 }, { "epoch": 0.4010068837973903, "grad_norm": 0.12885436415672302, "learning_rate": 0.01, "loss": 2.247, "step": 3903 }, { "epoch": 0.4013151135312853, "grad_norm": 0.07175249606370926, "learning_rate": 0.01, "loss": 2.2407, "step": 3906 }, { "epoch": 0.4016233432651803, "grad_norm": 0.10784266144037247, "learning_rate": 0.01, "loss": 2.2458, "step": 3909 }, { "epoch": 0.40193157299907534, "grad_norm": 0.08646712452173233, "learning_rate": 0.01, "loss": 2.2571, "step": 3912 }, { "epoch": 0.40223980273297033, "grad_norm": 0.05365338176488876, "learning_rate": 0.01, "loss": 2.2585, "step": 3915 }, { "epoch": 0.4025480324668653, "grad_norm": 0.07037780433893204, "learning_rate": 0.01, "loss": 2.2277, "step": 3918 }, { "epoch": 0.4028562622007603, "grad_norm": 0.040290024131536484, "learning_rate": 0.01, "loss": 2.2508, "step": 3921 }, { "epoch": 0.4031644919346553, "grad_norm": 0.050338853150606155, "learning_rate": 0.01, "loss": 2.2356, "step": 3924 }, { "epoch": 0.4034727216685503, "grad_norm": 0.1420246660709381, "learning_rate": 0.01, "loss": 2.2531, "step": 3927 }, { "epoch": 0.4037809514024453, "grad_norm": 0.07432923465967178, "learning_rate": 0.01, "loss": 2.2766, "step": 3930 }, { "epoch": 0.4040891811363403, "grad_norm": 0.04954257979989052, "learning_rate": 0.01, "loss": 2.2825, "step": 3933 }, { "epoch": 0.4043974108702353, "grad_norm": 0.05988876149058342, "learning_rate": 0.01, "loss": 2.2342, "step": 3936 }, { "epoch": 0.4047056406041303, "grad_norm": 0.09800540655851364, "learning_rate": 0.01, "loss": 2.2268, "step": 3939 }, { "epoch": 0.4050138703380253, "grad_norm": 0.09171874821186066, "learning_rate": 0.01, "loss": 2.2648, "step": 3942 }, { "epoch": 0.40532210007192027, "grad_norm": 0.07430606335401535, "learning_rate": 0.01, "loss": 2.2523, "step": 3945 }, { "epoch": 0.40563032980581526, "grad_norm": 0.043649185448884964, "learning_rate": 0.01, "loss": 2.2303, "step": 3948 }, { "epoch": 0.40593855953971025, "grad_norm": 0.04120480641722679, "learning_rate": 0.01, "loss": 2.2299, "step": 3951 }, { "epoch": 0.40624678927360525, "grad_norm": 0.0692945346236229, "learning_rate": 0.01, "loss": 2.2466, "step": 3954 }, { "epoch": 0.40655501900750024, "grad_norm": 0.08884318917989731, "learning_rate": 0.01, "loss": 2.2802, "step": 3957 }, { "epoch": 0.40686324874139523, "grad_norm": 0.05542384088039398, "learning_rate": 0.01, "loss": 2.2303, "step": 3960 }, { "epoch": 0.4071714784752902, "grad_norm": 0.08013599365949631, "learning_rate": 0.01, "loss": 2.2361, "step": 3963 }, { "epoch": 0.4074797082091852, "grad_norm": 0.15963242948055267, "learning_rate": 0.01, "loss": 2.2608, "step": 3966 }, { "epoch": 0.40778793794308027, "grad_norm": 0.05428241938352585, "learning_rate": 0.01, "loss": 2.2415, "step": 3969 }, { "epoch": 0.40809616767697526, "grad_norm": 0.09297880530357361, "learning_rate": 0.01, "loss": 2.2804, "step": 3972 }, { "epoch": 0.40840439741087026, "grad_norm": 0.11259882897138596, "learning_rate": 0.01, "loss": 2.2562, "step": 3975 }, { "epoch": 0.40871262714476525, "grad_norm": 0.0546397790312767, "learning_rate": 0.01, "loss": 2.2423, "step": 3978 }, { "epoch": 0.40902085687866024, "grad_norm": 0.13870957493782043, "learning_rate": 0.01, "loss": 2.2431, "step": 3981 }, { "epoch": 0.40932908661255524, "grad_norm": 0.05527504161000252, "learning_rate": 0.01, "loss": 2.2649, "step": 3984 }, { "epoch": 0.40963731634645023, "grad_norm": 0.08060980588197708, "learning_rate": 0.01, "loss": 2.2708, "step": 3987 }, { "epoch": 0.4099455460803452, "grad_norm": 0.05611690506339073, "learning_rate": 0.01, "loss": 2.2683, "step": 3990 }, { "epoch": 0.4102537758142402, "grad_norm": 0.08760816603899002, "learning_rate": 0.01, "loss": 2.2392, "step": 3993 }, { "epoch": 0.4105620055481352, "grad_norm": 0.07327746599912643, "learning_rate": 0.01, "loss": 2.2587, "step": 3996 }, { "epoch": 0.4108702352820302, "grad_norm": 0.05924748629331589, "learning_rate": 0.01, "loss": 2.2435, "step": 3999 }, { "epoch": 0.4111784650159252, "grad_norm": 0.08269370347261429, "learning_rate": 0.01, "loss": 2.2365, "step": 4002 }, { "epoch": 0.4114866947498202, "grad_norm": 0.06834371387958527, "learning_rate": 0.01, "loss": 2.2579, "step": 4005 }, { "epoch": 0.4117949244837152, "grad_norm": 0.06737885624170303, "learning_rate": 0.01, "loss": 2.2585, "step": 4008 }, { "epoch": 0.4121031542176102, "grad_norm": 0.0919148176908493, "learning_rate": 0.01, "loss": 2.2524, "step": 4011 }, { "epoch": 0.4124113839515052, "grad_norm": 0.0744348093867302, "learning_rate": 0.01, "loss": 2.2328, "step": 4014 }, { "epoch": 0.41271961368540017, "grad_norm": 0.08952994644641876, "learning_rate": 0.01, "loss": 2.2556, "step": 4017 }, { "epoch": 0.41302784341929516, "grad_norm": 0.054230738431215286, "learning_rate": 0.01, "loss": 2.2559, "step": 4020 }, { "epoch": 0.41333607315319015, "grad_norm": 0.11185753345489502, "learning_rate": 0.01, "loss": 2.2599, "step": 4023 }, { "epoch": 0.41364430288708515, "grad_norm": 0.11211541295051575, "learning_rate": 0.01, "loss": 2.2456, "step": 4026 }, { "epoch": 0.4139525326209802, "grad_norm": 0.08211257308721542, "learning_rate": 0.01, "loss": 2.2636, "step": 4029 }, { "epoch": 0.4142607623548752, "grad_norm": 0.07233046740293503, "learning_rate": 0.01, "loss": 2.2148, "step": 4032 }, { "epoch": 0.4145689920887702, "grad_norm": 0.1062379851937294, "learning_rate": 0.01, "loss": 2.2382, "step": 4035 }, { "epoch": 0.4148772218226652, "grad_norm": 0.07079877704381943, "learning_rate": 0.01, "loss": 2.2462, "step": 4038 }, { "epoch": 0.41518545155656017, "grad_norm": 0.04237307608127594, "learning_rate": 0.01, "loss": 2.2523, "step": 4041 }, { "epoch": 0.41549368129045516, "grad_norm": 0.12513239681720734, "learning_rate": 0.01, "loss": 2.2614, "step": 4044 }, { "epoch": 0.41580191102435016, "grad_norm": 0.07134360820055008, "learning_rate": 0.01, "loss": 2.2533, "step": 4047 }, { "epoch": 0.41611014075824515, "grad_norm": 0.07371515780687332, "learning_rate": 0.01, "loss": 2.2333, "step": 4050 }, { "epoch": 0.41641837049214014, "grad_norm": 0.05744464695453644, "learning_rate": 0.01, "loss": 2.2292, "step": 4053 }, { "epoch": 0.41672660022603514, "grad_norm": 0.0790088102221489, "learning_rate": 0.01, "loss": 2.2217, "step": 4056 }, { "epoch": 0.41703482995993013, "grad_norm": 0.12540112435817719, "learning_rate": 0.01, "loss": 2.2367, "step": 4059 }, { "epoch": 0.4173430596938251, "grad_norm": 0.06895852833986282, "learning_rate": 0.01, "loss": 2.2354, "step": 4062 }, { "epoch": 0.4176512894277201, "grad_norm": 0.09068478643894196, "learning_rate": 0.01, "loss": 2.2605, "step": 4065 }, { "epoch": 0.4179595191616151, "grad_norm": 0.051881443709135056, "learning_rate": 0.01, "loss": 2.2501, "step": 4068 }, { "epoch": 0.4182677488955101, "grad_norm": 0.20433951914310455, "learning_rate": 0.01, "loss": 2.2582, "step": 4071 }, { "epoch": 0.4185759786294051, "grad_norm": 0.08301309496164322, "learning_rate": 0.01, "loss": 2.2424, "step": 4074 }, { "epoch": 0.4188842083633001, "grad_norm": 0.07062964886426926, "learning_rate": 0.01, "loss": 2.2345, "step": 4077 }, { "epoch": 0.4191924380971951, "grad_norm": 0.09770773351192474, "learning_rate": 0.01, "loss": 2.264, "step": 4080 }, { "epoch": 0.4195006678310901, "grad_norm": 0.0847458690404892, "learning_rate": 0.01, "loss": 2.2329, "step": 4083 }, { "epoch": 0.41980889756498513, "grad_norm": 0.06491915881633759, "learning_rate": 0.01, "loss": 2.2174, "step": 4086 }, { "epoch": 0.4201171272988801, "grad_norm": 0.11355047672986984, "learning_rate": 0.01, "loss": 2.2653, "step": 4089 }, { "epoch": 0.4204253570327751, "grad_norm": 0.10509520024061203, "learning_rate": 0.01, "loss": 2.2435, "step": 4092 }, { "epoch": 0.4207335867666701, "grad_norm": 0.07456620037555695, "learning_rate": 0.01, "loss": 2.2348, "step": 4095 }, { "epoch": 0.4210418165005651, "grad_norm": 0.07531027495861053, "learning_rate": 0.01, "loss": 2.2524, "step": 4098 }, { "epoch": 0.4213500462344601, "grad_norm": 0.06129564717411995, "learning_rate": 0.01, "loss": 2.2577, "step": 4101 }, { "epoch": 0.4216582759683551, "grad_norm": 0.03984616696834564, "learning_rate": 0.01, "loss": 2.2354, "step": 4104 }, { "epoch": 0.4219665057022501, "grad_norm": 0.1273418813943863, "learning_rate": 0.01, "loss": 2.2478, "step": 4107 }, { "epoch": 0.4222747354361451, "grad_norm": 0.08859774470329285, "learning_rate": 0.01, "loss": 2.2504, "step": 4110 }, { "epoch": 0.42258296517004007, "grad_norm": 0.10512147098779678, "learning_rate": 0.01, "loss": 2.2435, "step": 4113 }, { "epoch": 0.42289119490393506, "grad_norm": 0.11181578040122986, "learning_rate": 0.01, "loss": 2.2396, "step": 4116 }, { "epoch": 0.42319942463783006, "grad_norm": 0.07474307715892792, "learning_rate": 0.01, "loss": 2.2518, "step": 4119 }, { "epoch": 0.42350765437172505, "grad_norm": 0.07233690470457077, "learning_rate": 0.01, "loss": 2.2283, "step": 4122 }, { "epoch": 0.42381588410562004, "grad_norm": 0.06051602587103844, "learning_rate": 0.01, "loss": 2.2429, "step": 4125 }, { "epoch": 0.42412411383951504, "grad_norm": 0.0492120198905468, "learning_rate": 0.01, "loss": 2.2312, "step": 4128 }, { "epoch": 0.42443234357341003, "grad_norm": 0.07249493151903152, "learning_rate": 0.01, "loss": 2.244, "step": 4131 }, { "epoch": 0.424740573307305, "grad_norm": 0.0993468165397644, "learning_rate": 0.01, "loss": 2.2441, "step": 4134 }, { "epoch": 0.4250488030412, "grad_norm": 0.07051920145750046, "learning_rate": 0.01, "loss": 2.2188, "step": 4137 }, { "epoch": 0.425357032775095, "grad_norm": 0.08267249912023544, "learning_rate": 0.01, "loss": 2.2472, "step": 4140 }, { "epoch": 0.42566526250899006, "grad_norm": 0.1307336390018463, "learning_rate": 0.01, "loss": 2.2359, "step": 4143 }, { "epoch": 0.42597349224288505, "grad_norm": 0.09383214265108109, "learning_rate": 0.01, "loss": 2.2519, "step": 4146 }, { "epoch": 0.42628172197678005, "grad_norm": 0.08928582817316055, "learning_rate": 0.01, "loss": 2.2322, "step": 4149 }, { "epoch": 0.42658995171067504, "grad_norm": 0.10554556548595428, "learning_rate": 0.01, "loss": 2.2219, "step": 4152 }, { "epoch": 0.42689818144457004, "grad_norm": 0.06501816213130951, "learning_rate": 0.01, "loss": 2.2351, "step": 4155 }, { "epoch": 0.42720641117846503, "grad_norm": 0.10736589878797531, "learning_rate": 0.01, "loss": 2.2327, "step": 4158 }, { "epoch": 0.42751464091236, "grad_norm": 0.11834681034088135, "learning_rate": 0.01, "loss": 2.2617, "step": 4161 }, { "epoch": 0.427822870646255, "grad_norm": 0.07011161744594574, "learning_rate": 0.01, "loss": 2.2218, "step": 4164 }, { "epoch": 0.42813110038015, "grad_norm": 0.0653071179986, "learning_rate": 0.01, "loss": 2.2115, "step": 4167 }, { "epoch": 0.428439330114045, "grad_norm": 0.057517893612384796, "learning_rate": 0.01, "loss": 2.2533, "step": 4170 }, { "epoch": 0.42874755984794, "grad_norm": 0.060261376202106476, "learning_rate": 0.01, "loss": 2.2199, "step": 4173 }, { "epoch": 0.429055789581835, "grad_norm": 0.12384762614965439, "learning_rate": 0.01, "loss": 2.2124, "step": 4176 }, { "epoch": 0.42936401931573, "grad_norm": 0.06436473876237869, "learning_rate": 0.01, "loss": 2.2558, "step": 4179 }, { "epoch": 0.429672249049625, "grad_norm": 0.049704987555742264, "learning_rate": 0.01, "loss": 2.2434, "step": 4182 }, { "epoch": 0.42998047878351997, "grad_norm": 0.0809103325009346, "learning_rate": 0.01, "loss": 2.2461, "step": 4185 }, { "epoch": 0.43028870851741496, "grad_norm": 0.04888701066374779, "learning_rate": 0.01, "loss": 2.2342, "step": 4188 }, { "epoch": 0.43059693825130996, "grad_norm": 0.04951067641377449, "learning_rate": 0.01, "loss": 2.2292, "step": 4191 }, { "epoch": 0.43090516798520495, "grad_norm": 0.13740333914756775, "learning_rate": 0.01, "loss": 2.2243, "step": 4194 }, { "epoch": 0.43121339771909994, "grad_norm": 0.09912848472595215, "learning_rate": 0.01, "loss": 2.2065, "step": 4197 }, { "epoch": 0.43152162745299494, "grad_norm": 0.1031954362988472, "learning_rate": 0.01, "loss": 2.2247, "step": 4200 }, { "epoch": 0.43182985718689, "grad_norm": 0.04378229379653931, "learning_rate": 0.01, "loss": 2.2485, "step": 4203 }, { "epoch": 0.432138086920785, "grad_norm": 0.05430865287780762, "learning_rate": 0.01, "loss": 2.2178, "step": 4206 }, { "epoch": 0.43244631665468, "grad_norm": 0.05675321817398071, "learning_rate": 0.01, "loss": 2.2568, "step": 4209 }, { "epoch": 0.43275454638857497, "grad_norm": 0.07637004554271698, "learning_rate": 0.01, "loss": 2.2567, "step": 4212 }, { "epoch": 0.43306277612246996, "grad_norm": 0.06263475120067596, "learning_rate": 0.01, "loss": 2.2597, "step": 4215 }, { "epoch": 0.43337100585636495, "grad_norm": 0.09689760208129883, "learning_rate": 0.01, "loss": 2.2376, "step": 4218 }, { "epoch": 0.43367923559025995, "grad_norm": 0.13923399150371552, "learning_rate": 0.01, "loss": 2.2394, "step": 4221 }, { "epoch": 0.43398746532415494, "grad_norm": 0.0607299767434597, "learning_rate": 0.01, "loss": 2.2366, "step": 4224 }, { "epoch": 0.43429569505804994, "grad_norm": 0.05221550166606903, "learning_rate": 0.01, "loss": 2.2587, "step": 4227 }, { "epoch": 0.43460392479194493, "grad_norm": 0.05556831881403923, "learning_rate": 0.01, "loss": 2.2422, "step": 4230 }, { "epoch": 0.4349121545258399, "grad_norm": 0.0843261182308197, "learning_rate": 0.01, "loss": 2.2399, "step": 4233 }, { "epoch": 0.4352203842597349, "grad_norm": 0.08864692598581314, "learning_rate": 0.01, "loss": 2.2155, "step": 4236 }, { "epoch": 0.4355286139936299, "grad_norm": 0.11530198156833649, "learning_rate": 0.01, "loss": 2.2612, "step": 4239 }, { "epoch": 0.4358368437275249, "grad_norm": 0.11549337208271027, "learning_rate": 0.01, "loss": 2.2233, "step": 4242 }, { "epoch": 0.4361450734614199, "grad_norm": 0.11105350404977798, "learning_rate": 0.01, "loss": 2.2426, "step": 4245 }, { "epoch": 0.4364533031953149, "grad_norm": 0.1190980076789856, "learning_rate": 0.01, "loss": 2.2353, "step": 4248 }, { "epoch": 0.4367615329292099, "grad_norm": 0.08560021221637726, "learning_rate": 0.01, "loss": 2.2542, "step": 4251 }, { "epoch": 0.4370697626631049, "grad_norm": 0.05514337494969368, "learning_rate": 0.01, "loss": 2.2171, "step": 4254 }, { "epoch": 0.43737799239699987, "grad_norm": 0.06764981150627136, "learning_rate": 0.01, "loss": 2.2363, "step": 4257 }, { "epoch": 0.4376862221308949, "grad_norm": 0.04801105335354805, "learning_rate": 0.01, "loss": 2.2352, "step": 4260 }, { "epoch": 0.4379944518647899, "grad_norm": 0.04782482981681824, "learning_rate": 0.01, "loss": 2.2458, "step": 4263 }, { "epoch": 0.4383026815986849, "grad_norm": 0.12880820035934448, "learning_rate": 0.01, "loss": 2.2384, "step": 4266 }, { "epoch": 0.4386109113325799, "grad_norm": 0.06714754551649094, "learning_rate": 0.01, "loss": 2.2214, "step": 4269 }, { "epoch": 0.4389191410664749, "grad_norm": 0.08878037333488464, "learning_rate": 0.01, "loss": 2.2597, "step": 4272 }, { "epoch": 0.4392273708003699, "grad_norm": 0.051335882395505905, "learning_rate": 0.01, "loss": 2.2065, "step": 4275 }, { "epoch": 0.4395356005342649, "grad_norm": 0.058174654841423035, "learning_rate": 0.01, "loss": 2.246, "step": 4278 }, { "epoch": 0.4398438302681599, "grad_norm": 0.053695593029260635, "learning_rate": 0.01, "loss": 2.2406, "step": 4281 }, { "epoch": 0.44015206000205487, "grad_norm": 0.07685926556587219, "learning_rate": 0.01, "loss": 2.2212, "step": 4284 }, { "epoch": 0.44046028973594986, "grad_norm": 0.13495223224163055, "learning_rate": 0.01, "loss": 2.2486, "step": 4287 }, { "epoch": 0.44076851946984485, "grad_norm": 0.0707453116774559, "learning_rate": 0.01, "loss": 2.247, "step": 4290 }, { "epoch": 0.44107674920373985, "grad_norm": 0.04909240081906319, "learning_rate": 0.01, "loss": 2.2528, "step": 4293 }, { "epoch": 0.44138497893763484, "grad_norm": 0.06148238107562065, "learning_rate": 0.01, "loss": 2.2462, "step": 4296 }, { "epoch": 0.44169320867152984, "grad_norm": 0.07306285202503204, "learning_rate": 0.01, "loss": 2.199, "step": 4299 }, { "epoch": 0.44200143840542483, "grad_norm": 0.12965865433216095, "learning_rate": 0.01, "loss": 2.2156, "step": 4302 }, { "epoch": 0.4423096681393198, "grad_norm": 0.059606775641441345, "learning_rate": 0.01, "loss": 2.2209, "step": 4305 }, { "epoch": 0.4426178978732148, "grad_norm": 0.06866457313299179, "learning_rate": 0.01, "loss": 2.2508, "step": 4308 }, { "epoch": 0.4429261276071098, "grad_norm": 0.08940677344799042, "learning_rate": 0.01, "loss": 2.2244, "step": 4311 }, { "epoch": 0.4432343573410048, "grad_norm": 0.10428988933563232, "learning_rate": 0.01, "loss": 2.2106, "step": 4314 }, { "epoch": 0.44354258707489985, "grad_norm": 0.1565064787864685, "learning_rate": 0.01, "loss": 2.2745, "step": 4317 }, { "epoch": 0.44385081680879485, "grad_norm": 0.11433500796556473, "learning_rate": 0.01, "loss": 2.2655, "step": 4320 }, { "epoch": 0.44415904654268984, "grad_norm": 0.07315809279680252, "learning_rate": 0.01, "loss": 2.2523, "step": 4323 }, { "epoch": 0.44446727627658483, "grad_norm": 0.048583708703517914, "learning_rate": 0.01, "loss": 2.2345, "step": 4326 }, { "epoch": 0.4447755060104798, "grad_norm": 0.03422848507761955, "learning_rate": 0.01, "loss": 2.1883, "step": 4329 }, { "epoch": 0.4450837357443748, "grad_norm": 0.05057518929243088, "learning_rate": 0.01, "loss": 2.2288, "step": 4332 }, { "epoch": 0.4453919654782698, "grad_norm": 0.10407044738531113, "learning_rate": 0.01, "loss": 2.1974, "step": 4335 }, { "epoch": 0.4457001952121648, "grad_norm": 0.06545260548591614, "learning_rate": 0.01, "loss": 2.2121, "step": 4338 }, { "epoch": 0.4460084249460598, "grad_norm": 0.09442485123872757, "learning_rate": 0.01, "loss": 2.2145, "step": 4341 }, { "epoch": 0.4463166546799548, "grad_norm": 0.11353209614753723, "learning_rate": 0.01, "loss": 2.227, "step": 4344 }, { "epoch": 0.4466248844138498, "grad_norm": 0.11243279278278351, "learning_rate": 0.01, "loss": 2.242, "step": 4347 }, { "epoch": 0.4469331141477448, "grad_norm": 0.14264856278896332, "learning_rate": 0.01, "loss": 2.2405, "step": 4350 }, { "epoch": 0.4472413438816398, "grad_norm": 0.048186566680669785, "learning_rate": 0.01, "loss": 2.1921, "step": 4353 }, { "epoch": 0.44754957361553477, "grad_norm": 0.0693448930978775, "learning_rate": 0.01, "loss": 2.2404, "step": 4356 }, { "epoch": 0.44785780334942976, "grad_norm": 0.04426461458206177, "learning_rate": 0.01, "loss": 2.2114, "step": 4359 }, { "epoch": 0.44816603308332476, "grad_norm": 0.06392990797758102, "learning_rate": 0.01, "loss": 2.224, "step": 4362 }, { "epoch": 0.44847426281721975, "grad_norm": 0.16224262118339539, "learning_rate": 0.01, "loss": 2.261, "step": 4365 }, { "epoch": 0.44878249255111474, "grad_norm": 0.06382444500923157, "learning_rate": 0.01, "loss": 2.2067, "step": 4368 }, { "epoch": 0.44909072228500974, "grad_norm": 0.09267281740903854, "learning_rate": 0.01, "loss": 2.2403, "step": 4371 }, { "epoch": 0.44939895201890473, "grad_norm": 0.09785914421081543, "learning_rate": 0.01, "loss": 2.2276, "step": 4374 }, { "epoch": 0.4497071817527998, "grad_norm": 0.06673259288072586, "learning_rate": 0.01, "loss": 2.214, "step": 4377 }, { "epoch": 0.45001541148669477, "grad_norm": 0.05463524907827377, "learning_rate": 0.01, "loss": 2.2048, "step": 4380 }, { "epoch": 0.45032364122058977, "grad_norm": 0.05466567724943161, "learning_rate": 0.01, "loss": 2.2062, "step": 4383 }, { "epoch": 0.45063187095448476, "grad_norm": 0.07413290441036224, "learning_rate": 0.01, "loss": 2.2178, "step": 4386 }, { "epoch": 0.45094010068837975, "grad_norm": 0.06564678996801376, "learning_rate": 0.01, "loss": 2.2304, "step": 4389 }, { "epoch": 0.45124833042227475, "grad_norm": 0.12468644231557846, "learning_rate": 0.01, "loss": 2.2301, "step": 4392 }, { "epoch": 0.45155656015616974, "grad_norm": 0.06898069381713867, "learning_rate": 0.01, "loss": 2.2255, "step": 4395 }, { "epoch": 0.45186478989006473, "grad_norm": 0.13579058647155762, "learning_rate": 0.01, "loss": 2.2021, "step": 4398 }, { "epoch": 0.4521730196239597, "grad_norm": 0.07980421930551529, "learning_rate": 0.01, "loss": 2.2598, "step": 4401 }, { "epoch": 0.4524812493578547, "grad_norm": 0.07771994173526764, "learning_rate": 0.01, "loss": 2.2166, "step": 4404 }, { "epoch": 0.4527894790917497, "grad_norm": 0.08967602998018265, "learning_rate": 0.01, "loss": 2.2095, "step": 4407 }, { "epoch": 0.4530977088256447, "grad_norm": 0.10909977555274963, "learning_rate": 0.01, "loss": 2.2064, "step": 4410 }, { "epoch": 0.4534059385595397, "grad_norm": 0.11167363077402115, "learning_rate": 0.01, "loss": 2.2021, "step": 4413 }, { "epoch": 0.4537141682934347, "grad_norm": 0.10310694575309753, "learning_rate": 0.01, "loss": 2.2582, "step": 4416 }, { "epoch": 0.4540223980273297, "grad_norm": 0.06411474943161011, "learning_rate": 0.01, "loss": 2.2203, "step": 4419 }, { "epoch": 0.4543306277612247, "grad_norm": 0.11141805350780487, "learning_rate": 0.01, "loss": 2.2163, "step": 4422 }, { "epoch": 0.4546388574951197, "grad_norm": 0.09054200351238251, "learning_rate": 0.01, "loss": 2.2054, "step": 4425 }, { "epoch": 0.45494708722901467, "grad_norm": 0.06952405720949173, "learning_rate": 0.01, "loss": 2.2488, "step": 4428 }, { "epoch": 0.45525531696290966, "grad_norm": 0.08597440272569656, "learning_rate": 0.01, "loss": 2.2044, "step": 4431 }, { "epoch": 0.4555635466968047, "grad_norm": 0.06718187034130096, "learning_rate": 0.01, "loss": 2.2419, "step": 4434 }, { "epoch": 0.4558717764306997, "grad_norm": 0.0558515265583992, "learning_rate": 0.01, "loss": 2.2102, "step": 4437 }, { "epoch": 0.4561800061645947, "grad_norm": 0.0560682937502861, "learning_rate": 0.01, "loss": 2.2324, "step": 4440 }, { "epoch": 0.4564882358984897, "grad_norm": 0.058881547302007675, "learning_rate": 0.01, "loss": 2.1966, "step": 4443 }, { "epoch": 0.4567964656323847, "grad_norm": 0.07034582644701004, "learning_rate": 0.01, "loss": 2.2021, "step": 4446 }, { "epoch": 0.4571046953662797, "grad_norm": 0.09703799337148666, "learning_rate": 0.01, "loss": 2.21, "step": 4449 }, { "epoch": 0.45741292510017467, "grad_norm": 0.06268820911645889, "learning_rate": 0.01, "loss": 2.2237, "step": 4452 }, { "epoch": 0.45772115483406967, "grad_norm": 0.123359814286232, "learning_rate": 0.01, "loss": 2.2063, "step": 4455 }, { "epoch": 0.45802938456796466, "grad_norm": 0.0536644384264946, "learning_rate": 0.01, "loss": 2.2002, "step": 4458 }, { "epoch": 0.45833761430185965, "grad_norm": 0.0957527682185173, "learning_rate": 0.01, "loss": 2.2484, "step": 4461 }, { "epoch": 0.45864584403575465, "grad_norm": 0.12607458233833313, "learning_rate": 0.01, "loss": 2.2241, "step": 4464 }, { "epoch": 0.45895407376964964, "grad_norm": 0.07415255159139633, "learning_rate": 0.01, "loss": 2.2083, "step": 4467 }, { "epoch": 0.45926230350354463, "grad_norm": 0.10248073190450668, "learning_rate": 0.01, "loss": 2.2253, "step": 4470 }, { "epoch": 0.4595705332374396, "grad_norm": 0.05264243111014366, "learning_rate": 0.01, "loss": 2.2166, "step": 4473 }, { "epoch": 0.4598787629713346, "grad_norm": 0.0557783767580986, "learning_rate": 0.01, "loss": 2.2213, "step": 4476 }, { "epoch": 0.4601869927052296, "grad_norm": 0.06835830211639404, "learning_rate": 0.01, "loss": 2.2255, "step": 4479 }, { "epoch": 0.4604952224391246, "grad_norm": 0.12045460939407349, "learning_rate": 0.01, "loss": 2.2331, "step": 4482 }, { "epoch": 0.4608034521730196, "grad_norm": 0.11495090276002884, "learning_rate": 0.01, "loss": 2.2191, "step": 4485 }, { "epoch": 0.4611116819069146, "grad_norm": 0.07859046757221222, "learning_rate": 0.01, "loss": 2.2282, "step": 4488 }, { "epoch": 0.46141991164080964, "grad_norm": 0.03789819777011871, "learning_rate": 0.01, "loss": 2.2188, "step": 4491 }, { "epoch": 0.46172814137470464, "grad_norm": 0.03617655113339424, "learning_rate": 0.01, "loss": 2.2496, "step": 4494 }, { "epoch": 0.46203637110859963, "grad_norm": 0.06894705444574356, "learning_rate": 0.01, "loss": 2.2007, "step": 4497 }, { "epoch": 0.4623446008424946, "grad_norm": 0.1143706887960434, "learning_rate": 0.01, "loss": 2.2247, "step": 4500 }, { "epoch": 0.4626528305763896, "grad_norm": 0.10069230943918228, "learning_rate": 0.01, "loss": 2.2114, "step": 4503 }, { "epoch": 0.4629610603102846, "grad_norm": 0.10068007558584213, "learning_rate": 0.01, "loss": 2.2438, "step": 4506 }, { "epoch": 0.4632692900441796, "grad_norm": 0.05319290608167648, "learning_rate": 0.01, "loss": 2.2422, "step": 4509 }, { "epoch": 0.4635775197780746, "grad_norm": 0.06933122128248215, "learning_rate": 0.01, "loss": 2.2059, "step": 4512 }, { "epoch": 0.4638857495119696, "grad_norm": 0.11921056360006332, "learning_rate": 0.01, "loss": 2.2137, "step": 4515 }, { "epoch": 0.4641939792458646, "grad_norm": 0.06092121824622154, "learning_rate": 0.01, "loss": 2.1941, "step": 4518 }, { "epoch": 0.4645022089797596, "grad_norm": 0.06017937511205673, "learning_rate": 0.01, "loss": 2.2539, "step": 4521 }, { "epoch": 0.4648104387136546, "grad_norm": 0.05721915140748024, "learning_rate": 0.01, "loss": 2.2348, "step": 4524 }, { "epoch": 0.46511866844754957, "grad_norm": 0.07706714421510696, "learning_rate": 0.01, "loss": 2.2169, "step": 4527 }, { "epoch": 0.46542689818144456, "grad_norm": 0.07279779762029648, "learning_rate": 0.01, "loss": 2.2163, "step": 4530 }, { "epoch": 0.46573512791533955, "grad_norm": 0.06781268864870071, "learning_rate": 0.01, "loss": 2.1682, "step": 4533 }, { "epoch": 0.46604335764923455, "grad_norm": 0.0807657316327095, "learning_rate": 0.01, "loss": 2.2123, "step": 4536 }, { "epoch": 0.46635158738312954, "grad_norm": 0.06467099487781525, "learning_rate": 0.01, "loss": 2.2152, "step": 4539 }, { "epoch": 0.46665981711702453, "grad_norm": 0.10680168867111206, "learning_rate": 0.01, "loss": 2.2062, "step": 4542 }, { "epoch": 0.4669680468509195, "grad_norm": 0.11668167263269424, "learning_rate": 0.01, "loss": 2.206, "step": 4545 }, { "epoch": 0.4672762765848146, "grad_norm": 0.06468226760625839, "learning_rate": 0.01, "loss": 2.2011, "step": 4548 }, { "epoch": 0.46758450631870957, "grad_norm": 0.07668601721525192, "learning_rate": 0.01, "loss": 2.2128, "step": 4551 }, { "epoch": 0.46789273605260456, "grad_norm": 0.05631673336029053, "learning_rate": 0.01, "loss": 2.1812, "step": 4554 }, { "epoch": 0.46820096578649956, "grad_norm": 0.12898530066013336, "learning_rate": 0.01, "loss": 2.2312, "step": 4557 }, { "epoch": 0.46850919552039455, "grad_norm": 0.07105603069067001, "learning_rate": 0.01, "loss": 2.1949, "step": 4560 }, { "epoch": 0.46881742525428954, "grad_norm": 0.07172367721796036, "learning_rate": 0.01, "loss": 2.2509, "step": 4563 }, { "epoch": 0.46912565498818454, "grad_norm": 0.1219574511051178, "learning_rate": 0.01, "loss": 2.2147, "step": 4566 }, { "epoch": 0.46943388472207953, "grad_norm": 0.05777307227253914, "learning_rate": 0.01, "loss": 2.2071, "step": 4569 }, { "epoch": 0.4697421144559745, "grad_norm": 0.12805253267288208, "learning_rate": 0.01, "loss": 2.2166, "step": 4572 }, { "epoch": 0.4700503441898695, "grad_norm": 0.11360877752304077, "learning_rate": 0.01, "loss": 2.1827, "step": 4575 }, { "epoch": 0.4703585739237645, "grad_norm": 0.07203348726034164, "learning_rate": 0.01, "loss": 2.2378, "step": 4578 }, { "epoch": 0.4706668036576595, "grad_norm": 0.05645303055644035, "learning_rate": 0.01, "loss": 2.2044, "step": 4581 }, { "epoch": 0.4709750333915545, "grad_norm": 0.06103040650486946, "learning_rate": 0.01, "loss": 2.2302, "step": 4584 }, { "epoch": 0.4712832631254495, "grad_norm": 0.0621771402657032, "learning_rate": 0.01, "loss": 2.2147, "step": 4587 }, { "epoch": 0.4715914928593445, "grad_norm": 0.08458666503429413, "learning_rate": 0.01, "loss": 2.1781, "step": 4590 }, { "epoch": 0.4718997225932395, "grad_norm": 0.092729851603508, "learning_rate": 0.01, "loss": 2.2326, "step": 4593 }, { "epoch": 0.4722079523271345, "grad_norm": 0.09255766123533249, "learning_rate": 0.01, "loss": 2.2082, "step": 4596 }, { "epoch": 0.47251618206102947, "grad_norm": 0.11929985135793686, "learning_rate": 0.01, "loss": 2.2064, "step": 4599 }, { "epoch": 0.47282441179492446, "grad_norm": 0.12234004586935043, "learning_rate": 0.01, "loss": 2.1513, "step": 4602 }, { "epoch": 0.47313264152881945, "grad_norm": 0.07648742944002151, "learning_rate": 0.01, "loss": 2.2376, "step": 4605 }, { "epoch": 0.4734408712627145, "grad_norm": 0.05717691034078598, "learning_rate": 0.01, "loss": 2.231, "step": 4608 }, { "epoch": 0.4737491009966095, "grad_norm": 0.048224568367004395, "learning_rate": 0.01, "loss": 2.2126, "step": 4611 }, { "epoch": 0.4740573307305045, "grad_norm": 0.07530826330184937, "learning_rate": 0.01, "loss": 2.2155, "step": 4614 }, { "epoch": 0.4743655604643995, "grad_norm": 0.08617862313985825, "learning_rate": 0.01, "loss": 2.2286, "step": 4617 }, { "epoch": 0.4746737901982945, "grad_norm": 0.10041820257902145, "learning_rate": 0.01, "loss": 2.1917, "step": 4620 }, { "epoch": 0.47498201993218947, "grad_norm": 0.04470205307006836, "learning_rate": 0.01, "loss": 2.2188, "step": 4623 }, { "epoch": 0.47529024966608446, "grad_norm": 0.060269374400377274, "learning_rate": 0.01, "loss": 2.2267, "step": 4626 }, { "epoch": 0.47559847939997946, "grad_norm": 0.06320520490407944, "learning_rate": 0.01, "loss": 2.2054, "step": 4629 }, { "epoch": 0.47590670913387445, "grad_norm": 0.05642838776111603, "learning_rate": 0.01, "loss": 2.2062, "step": 4632 }, { "epoch": 0.47621493886776944, "grad_norm": 0.064301997423172, "learning_rate": 0.01, "loss": 2.2296, "step": 4635 }, { "epoch": 0.47652316860166444, "grad_norm": 0.07448214292526245, "learning_rate": 0.01, "loss": 2.197, "step": 4638 }, { "epoch": 0.47683139833555943, "grad_norm": 0.08586326986551285, "learning_rate": 0.01, "loss": 2.1743, "step": 4641 }, { "epoch": 0.4771396280694544, "grad_norm": 0.13179326057434082, "learning_rate": 0.01, "loss": 2.2299, "step": 4644 }, { "epoch": 0.4774478578033494, "grad_norm": 0.1163720041513443, "learning_rate": 0.01, "loss": 2.2089, "step": 4647 }, { "epoch": 0.4777560875372444, "grad_norm": 0.04846031963825226, "learning_rate": 0.01, "loss": 2.1564, "step": 4650 }, { "epoch": 0.4780643172711394, "grad_norm": 0.13724131882190704, "learning_rate": 0.01, "loss": 2.2078, "step": 4653 }, { "epoch": 0.4783725470050344, "grad_norm": 0.062840536236763, "learning_rate": 0.01, "loss": 2.2252, "step": 4656 }, { "epoch": 0.4786807767389294, "grad_norm": 0.06721820682287216, "learning_rate": 0.01, "loss": 2.1781, "step": 4659 }, { "epoch": 0.4789890064728244, "grad_norm": 0.09086044877767563, "learning_rate": 0.01, "loss": 2.2179, "step": 4662 }, { "epoch": 0.47929723620671943, "grad_norm": 0.07732655107975006, "learning_rate": 0.01, "loss": 2.2334, "step": 4665 }, { "epoch": 0.47960546594061443, "grad_norm": 0.04763714596629143, "learning_rate": 0.01, "loss": 2.2262, "step": 4668 }, { "epoch": 0.4799136956745094, "grad_norm": 0.09649144858121872, "learning_rate": 0.01, "loss": 2.2141, "step": 4671 }, { "epoch": 0.4802219254084044, "grad_norm": 0.05458167567849159, "learning_rate": 0.01, "loss": 2.1967, "step": 4674 }, { "epoch": 0.4805301551422994, "grad_norm": 0.08577650040388107, "learning_rate": 0.01, "loss": 2.2183, "step": 4677 }, { "epoch": 0.4808383848761944, "grad_norm": 0.0733698159456253, "learning_rate": 0.01, "loss": 2.2185, "step": 4680 }, { "epoch": 0.4811466146100894, "grad_norm": 0.06648692488670349, "learning_rate": 0.01, "loss": 2.1904, "step": 4683 }, { "epoch": 0.4814548443439844, "grad_norm": 0.08376996219158173, "learning_rate": 0.01, "loss": 2.2097, "step": 4686 }, { "epoch": 0.4817630740778794, "grad_norm": 0.05270134285092354, "learning_rate": 0.01, "loss": 2.2304, "step": 4689 }, { "epoch": 0.4820713038117744, "grad_norm": 0.05531509965658188, "learning_rate": 0.01, "loss": 2.2039, "step": 4692 }, { "epoch": 0.48237953354566937, "grad_norm": 0.05848492309451103, "learning_rate": 0.01, "loss": 2.2113, "step": 4695 }, { "epoch": 0.48268776327956436, "grad_norm": 0.06692120432853699, "learning_rate": 0.01, "loss": 2.1972, "step": 4698 }, { "epoch": 0.48299599301345936, "grad_norm": 0.07243851572275162, "learning_rate": 0.01, "loss": 2.223, "step": 4701 }, { "epoch": 0.48330422274735435, "grad_norm": 0.06565523892641068, "learning_rate": 0.01, "loss": 2.1913, "step": 4704 }, { "epoch": 0.48361245248124934, "grad_norm": 0.04595122113823891, "learning_rate": 0.01, "loss": 2.1782, "step": 4707 }, { "epoch": 0.48392068221514434, "grad_norm": 0.06658844649791718, "learning_rate": 0.01, "loss": 2.224, "step": 4710 }, { "epoch": 0.48422891194903933, "grad_norm": 0.0807071253657341, "learning_rate": 0.01, "loss": 2.217, "step": 4713 }, { "epoch": 0.4845371416829343, "grad_norm": 0.0562782846391201, "learning_rate": 0.01, "loss": 2.2033, "step": 4716 }, { "epoch": 0.4848453714168293, "grad_norm": 0.07851718366146088, "learning_rate": 0.01, "loss": 2.1847, "step": 4719 }, { "epoch": 0.48515360115072437, "grad_norm": 0.07649900764226913, "learning_rate": 0.01, "loss": 2.2222, "step": 4722 }, { "epoch": 0.48546183088461936, "grad_norm": 0.07279150187969208, "learning_rate": 0.01, "loss": 2.1951, "step": 4725 }, { "epoch": 0.48577006061851435, "grad_norm": 0.053628645837306976, "learning_rate": 0.01, "loss": 2.1681, "step": 4728 }, { "epoch": 0.48607829035240935, "grad_norm": 0.09401357173919678, "learning_rate": 0.01, "loss": 2.1943, "step": 4731 }, { "epoch": 0.48638652008630434, "grad_norm": 0.1156088337302208, "learning_rate": 0.01, "loss": 2.2317, "step": 4734 }, { "epoch": 0.48669474982019933, "grad_norm": 0.12672138214111328, "learning_rate": 0.01, "loss": 2.2085, "step": 4737 }, { "epoch": 0.48700297955409433, "grad_norm": 0.06799574196338654, "learning_rate": 0.01, "loss": 2.2161, "step": 4740 }, { "epoch": 0.4873112092879893, "grad_norm": 0.06479325145483017, "learning_rate": 0.01, "loss": 2.1663, "step": 4743 }, { "epoch": 0.4876194390218843, "grad_norm": 0.09143824130296707, "learning_rate": 0.01, "loss": 2.2193, "step": 4746 }, { "epoch": 0.4879276687557793, "grad_norm": 0.09262688457965851, "learning_rate": 0.01, "loss": 2.218, "step": 4749 }, { "epoch": 0.4882358984896743, "grad_norm": 0.11519678682088852, "learning_rate": 0.01, "loss": 2.1937, "step": 4752 }, { "epoch": 0.4885441282235693, "grad_norm": 0.07646415382623672, "learning_rate": 0.01, "loss": 2.2133, "step": 4755 }, { "epoch": 0.4888523579574643, "grad_norm": 0.08090809732675552, "learning_rate": 0.01, "loss": 2.193, "step": 4758 }, { "epoch": 0.4891605876913593, "grad_norm": 0.08812209218740463, "learning_rate": 0.01, "loss": 2.2215, "step": 4761 }, { "epoch": 0.4894688174252543, "grad_norm": 0.14427846670150757, "learning_rate": 0.01, "loss": 2.2115, "step": 4764 }, { "epoch": 0.48977704715914927, "grad_norm": 0.08065719902515411, "learning_rate": 0.01, "loss": 2.1861, "step": 4767 }, { "epoch": 0.49008527689304426, "grad_norm": 0.04888691008090973, "learning_rate": 0.01, "loss": 2.1911, "step": 4770 }, { "epoch": 0.49039350662693926, "grad_norm": 0.04742259159684181, "learning_rate": 0.01, "loss": 2.2152, "step": 4773 }, { "epoch": 0.49070173636083425, "grad_norm": 0.061714138835668564, "learning_rate": 0.01, "loss": 2.2009, "step": 4776 }, { "epoch": 0.49100996609472924, "grad_norm": 0.07582443952560425, "learning_rate": 0.01, "loss": 2.2189, "step": 4779 }, { "epoch": 0.4913181958286243, "grad_norm": 0.1390780359506607, "learning_rate": 0.01, "loss": 2.211, "step": 4782 }, { "epoch": 0.4916264255625193, "grad_norm": 0.03784565255045891, "learning_rate": 0.01, "loss": 2.2011, "step": 4785 }, { "epoch": 0.4919346552964143, "grad_norm": 0.07413594424724579, "learning_rate": 0.01, "loss": 2.2103, "step": 4788 }, { "epoch": 0.4922428850303093, "grad_norm": 0.09402404725551605, "learning_rate": 0.01, "loss": 2.1912, "step": 4791 }, { "epoch": 0.49255111476420427, "grad_norm": 0.0717400312423706, "learning_rate": 0.01, "loss": 2.1868, "step": 4794 }, { "epoch": 0.49285934449809926, "grad_norm": 0.05179424583911896, "learning_rate": 0.01, "loss": 2.2298, "step": 4797 }, { "epoch": 0.49316757423199425, "grad_norm": 0.12123433500528336, "learning_rate": 0.01, "loss": 2.2005, "step": 4800 }, { "epoch": 0.49347580396588925, "grad_norm": 0.04941033944487572, "learning_rate": 0.01, "loss": 2.2113, "step": 4803 }, { "epoch": 0.49378403369978424, "grad_norm": 0.10987304151058197, "learning_rate": 0.01, "loss": 2.209, "step": 4806 }, { "epoch": 0.49409226343367924, "grad_norm": 0.09235193580389023, "learning_rate": 0.01, "loss": 2.1967, "step": 4809 }, { "epoch": 0.49440049316757423, "grad_norm": 0.057354703545570374, "learning_rate": 0.01, "loss": 2.219, "step": 4812 }, { "epoch": 0.4947087229014692, "grad_norm": 0.04692654311656952, "learning_rate": 0.01, "loss": 2.173, "step": 4815 }, { "epoch": 0.4950169526353642, "grad_norm": 0.09447453171014786, "learning_rate": 0.01, "loss": 2.1806, "step": 4818 }, { "epoch": 0.4953251823692592, "grad_norm": 0.09967079013586044, "learning_rate": 0.01, "loss": 2.1809, "step": 4821 }, { "epoch": 0.4956334121031542, "grad_norm": 0.06462189555168152, "learning_rate": 0.01, "loss": 2.1922, "step": 4824 }, { "epoch": 0.4959416418370492, "grad_norm": 0.038030870258808136, "learning_rate": 0.01, "loss": 2.2239, "step": 4827 }, { "epoch": 0.4962498715709442, "grad_norm": 0.06828872114419937, "learning_rate": 0.01, "loss": 2.1881, "step": 4830 }, { "epoch": 0.4965581013048392, "grad_norm": 0.10087070614099503, "learning_rate": 0.01, "loss": 2.22, "step": 4833 }, { "epoch": 0.4968663310387342, "grad_norm": 0.07630455493927002, "learning_rate": 0.01, "loss": 2.188, "step": 4836 }, { "epoch": 0.4971745607726292, "grad_norm": 0.05040668696165085, "learning_rate": 0.01, "loss": 2.2012, "step": 4839 }, { "epoch": 0.4974827905065242, "grad_norm": 0.05160282924771309, "learning_rate": 0.01, "loss": 2.2119, "step": 4842 }, { "epoch": 0.4977910202404192, "grad_norm": 0.04949258640408516, "learning_rate": 0.01, "loss": 2.1959, "step": 4845 }, { "epoch": 0.4980992499743142, "grad_norm": 0.07766029983758926, "learning_rate": 0.01, "loss": 2.1896, "step": 4848 }, { "epoch": 0.4984074797082092, "grad_norm": 0.06274580955505371, "learning_rate": 0.01, "loss": 2.2014, "step": 4851 }, { "epoch": 0.4987157094421042, "grad_norm": 0.1071280762553215, "learning_rate": 0.01, "loss": 2.2045, "step": 4854 }, { "epoch": 0.4990239391759992, "grad_norm": 0.10645020008087158, "learning_rate": 0.01, "loss": 2.1895, "step": 4857 }, { "epoch": 0.4993321689098942, "grad_norm": 0.1151091679930687, "learning_rate": 0.01, "loss": 2.1954, "step": 4860 }, { "epoch": 0.4996403986437892, "grad_norm": 0.09699530899524689, "learning_rate": 0.01, "loss": 2.1833, "step": 4863 }, { "epoch": 0.49994862837768417, "grad_norm": 0.06568959355354309, "learning_rate": 0.01, "loss": 2.1862, "step": 4866 }, { "epoch": 0.5002568581115792, "grad_norm": 0.0421447716653347, "learning_rate": 0.01, "loss": 2.1819, "step": 4869 }, { "epoch": 0.5005650878454742, "grad_norm": 0.04529868811368942, "learning_rate": 0.01, "loss": 2.1852, "step": 4872 }, { "epoch": 0.5008733175793691, "grad_norm": 0.059541650116443634, "learning_rate": 0.01, "loss": 2.1955, "step": 4875 }, { "epoch": 0.5011815473132641, "grad_norm": 0.061823770403862, "learning_rate": 0.01, "loss": 2.2039, "step": 4878 }, { "epoch": 0.5014897770471591, "grad_norm": 0.05892050638794899, "learning_rate": 0.01, "loss": 2.178, "step": 4881 }, { "epoch": 0.5017980067810541, "grad_norm": 0.04842402786016464, "learning_rate": 0.01, "loss": 2.1948, "step": 4884 }, { "epoch": 0.5021062365149491, "grad_norm": 0.05962050333619118, "learning_rate": 0.01, "loss": 2.1932, "step": 4887 }, { "epoch": 0.5024144662488441, "grad_norm": 0.056295089423656464, "learning_rate": 0.01, "loss": 2.1757, "step": 4890 }, { "epoch": 0.5027226959827391, "grad_norm": 0.07448049634695053, "learning_rate": 0.01, "loss": 2.2181, "step": 4893 }, { "epoch": 0.5030309257166341, "grad_norm": 0.07998815923929214, "learning_rate": 0.01, "loss": 2.1714, "step": 4896 }, { "epoch": 0.5033391554505291, "grad_norm": 0.08058517426252365, "learning_rate": 0.01, "loss": 2.2131, "step": 4899 }, { "epoch": 0.5036473851844241, "grad_norm": 0.07899410277605057, "learning_rate": 0.01, "loss": 2.1727, "step": 4902 }, { "epoch": 0.5039556149183191, "grad_norm": 0.05830831080675125, "learning_rate": 0.01, "loss": 2.176, "step": 4905 }, { "epoch": 0.5042638446522141, "grad_norm": 0.05831579118967056, "learning_rate": 0.01, "loss": 2.1754, "step": 4908 }, { "epoch": 0.5045720743861091, "grad_norm": 0.052614904940128326, "learning_rate": 0.01, "loss": 2.1935, "step": 4911 }, { "epoch": 0.5048803041200041, "grad_norm": 0.0830332413315773, "learning_rate": 0.01, "loss": 2.2274, "step": 4914 }, { "epoch": 0.5051885338538991, "grad_norm": 0.1138230562210083, "learning_rate": 0.01, "loss": 2.1976, "step": 4917 }, { "epoch": 0.505496763587794, "grad_norm": 0.07024016976356506, "learning_rate": 0.01, "loss": 2.1969, "step": 4920 }, { "epoch": 0.505804993321689, "grad_norm": 0.07235170155763626, "learning_rate": 0.01, "loss": 2.2163, "step": 4923 }, { "epoch": 0.5061132230555841, "grad_norm": 0.06894835084676743, "learning_rate": 0.01, "loss": 2.2232, "step": 4926 }, { "epoch": 0.5064214527894791, "grad_norm": 0.0825890600681305, "learning_rate": 0.01, "loss": 2.1896, "step": 4929 }, { "epoch": 0.5067296825233741, "grad_norm": 0.05901159718632698, "learning_rate": 0.01, "loss": 2.1988, "step": 4932 }, { "epoch": 0.5070379122572691, "grad_norm": 0.048157334327697754, "learning_rate": 0.01, "loss": 2.1904, "step": 4935 }, { "epoch": 0.5073461419911641, "grad_norm": 0.10036749392747879, "learning_rate": 0.01, "loss": 2.1977, "step": 4938 }, { "epoch": 0.5076543717250591, "grad_norm": 0.10984963923692703, "learning_rate": 0.01, "loss": 2.1957, "step": 4941 }, { "epoch": 0.5079626014589541, "grad_norm": 0.09587367624044418, "learning_rate": 0.01, "loss": 2.2, "step": 4944 }, { "epoch": 0.5082708311928491, "grad_norm": 0.06347552686929703, "learning_rate": 0.01, "loss": 2.1918, "step": 4947 }, { "epoch": 0.5085790609267441, "grad_norm": 0.0658629834651947, "learning_rate": 0.01, "loss": 2.1945, "step": 4950 }, { "epoch": 0.5088872906606391, "grad_norm": 0.045971643179655075, "learning_rate": 0.01, "loss": 2.2114, "step": 4953 }, { "epoch": 0.5091955203945341, "grad_norm": 0.04058291018009186, "learning_rate": 0.01, "loss": 2.2066, "step": 4956 }, { "epoch": 0.5095037501284291, "grad_norm": 0.052851296961307526, "learning_rate": 0.01, "loss": 2.1884, "step": 4959 }, { "epoch": 0.5098119798623241, "grad_norm": 0.033158350735902786, "learning_rate": 0.01, "loss": 2.2078, "step": 4962 }, { "epoch": 0.5101202095962191, "grad_norm": 0.05409036949276924, "learning_rate": 0.01, "loss": 2.181, "step": 4965 }, { "epoch": 0.5104284393301141, "grad_norm": 0.0731736570596695, "learning_rate": 0.01, "loss": 2.1825, "step": 4968 }, { "epoch": 0.510736669064009, "grad_norm": 0.05854470282793045, "learning_rate": 0.01, "loss": 2.2119, "step": 4971 }, { "epoch": 0.511044898797904, "grad_norm": 0.05071520060300827, "learning_rate": 0.01, "loss": 2.1886, "step": 4974 }, { "epoch": 0.511353128531799, "grad_norm": 0.060792725533246994, "learning_rate": 0.01, "loss": 2.2066, "step": 4977 }, { "epoch": 0.511661358265694, "grad_norm": 0.0910191684961319, "learning_rate": 0.01, "loss": 2.209, "step": 4980 }, { "epoch": 0.511969587999589, "grad_norm": 0.12366749346256256, "learning_rate": 0.01, "loss": 2.2263, "step": 4983 }, { "epoch": 0.512277817733484, "grad_norm": 0.11254429817199707, "learning_rate": 0.01, "loss": 2.175, "step": 4986 }, { "epoch": 0.512586047467379, "grad_norm": 0.11091643571853638, "learning_rate": 0.01, "loss": 2.2046, "step": 4989 }, { "epoch": 0.512894277201274, "grad_norm": 0.045611754059791565, "learning_rate": 0.01, "loss": 2.1861, "step": 4992 }, { "epoch": 0.513202506935169, "grad_norm": 0.09836157411336899, "learning_rate": 0.01, "loss": 2.1931, "step": 4995 }, { "epoch": 0.513510736669064, "grad_norm": 0.11932815611362457, "learning_rate": 0.01, "loss": 2.2087, "step": 4998 }, { "epoch": 0.513818966402959, "grad_norm": 0.10955359041690826, "learning_rate": 0.01, "loss": 2.19, "step": 5001 }, { "epoch": 0.514127196136854, "grad_norm": 0.09804633259773254, "learning_rate": 0.01, "loss": 2.1572, "step": 5004 }, { "epoch": 0.514435425870749, "grad_norm": 0.04835839942097664, "learning_rate": 0.01, "loss": 2.2115, "step": 5007 }, { "epoch": 0.514743655604644, "grad_norm": 0.04645110294222832, "learning_rate": 0.01, "loss": 2.2012, "step": 5010 }, { "epoch": 0.515051885338539, "grad_norm": 0.05947386845946312, "learning_rate": 0.01, "loss": 2.2039, "step": 5013 }, { "epoch": 0.515360115072434, "grad_norm": 0.05693971738219261, "learning_rate": 0.01, "loss": 2.1733, "step": 5016 }, { "epoch": 0.515668344806329, "grad_norm": 0.0724320039153099, "learning_rate": 0.01, "loss": 2.1944, "step": 5019 }, { "epoch": 0.5159765745402239, "grad_norm": 0.06627337634563446, "learning_rate": 0.01, "loss": 2.1932, "step": 5022 }, { "epoch": 0.5162848042741189, "grad_norm": 0.10879958420991898, "learning_rate": 0.01, "loss": 2.2024, "step": 5025 }, { "epoch": 0.5165930340080139, "grad_norm": 0.12266898900270462, "learning_rate": 0.01, "loss": 2.1938, "step": 5028 }, { "epoch": 0.5169012637419089, "grad_norm": 0.06240540370345116, "learning_rate": 0.01, "loss": 2.18, "step": 5031 }, { "epoch": 0.5172094934758039, "grad_norm": 0.05043266713619232, "learning_rate": 0.01, "loss": 2.1936, "step": 5034 }, { "epoch": 0.5175177232096989, "grad_norm": 0.052652738988399506, "learning_rate": 0.01, "loss": 2.1631, "step": 5037 }, { "epoch": 0.5178259529435939, "grad_norm": 0.04598904401063919, "learning_rate": 0.01, "loss": 2.2067, "step": 5040 }, { "epoch": 0.518134182677489, "grad_norm": 0.07040087133646011, "learning_rate": 0.01, "loss": 2.1737, "step": 5043 }, { "epoch": 0.518442412411384, "grad_norm": 0.04827702417969704, "learning_rate": 0.01, "loss": 2.2128, "step": 5046 }, { "epoch": 0.518750642145279, "grad_norm": 0.09803622215986252, "learning_rate": 0.01, "loss": 2.1851, "step": 5049 }, { "epoch": 0.519058871879174, "grad_norm": 0.1019926443696022, "learning_rate": 0.01, "loss": 2.181, "step": 5052 }, { "epoch": 0.519367101613069, "grad_norm": 0.08847504109144211, "learning_rate": 0.01, "loss": 2.1874, "step": 5055 }, { "epoch": 0.519675331346964, "grad_norm": 0.06151921674609184, "learning_rate": 0.01, "loss": 2.1785, "step": 5058 }, { "epoch": 0.519983561080859, "grad_norm": 0.04823022335767746, "learning_rate": 0.01, "loss": 2.1662, "step": 5061 }, { "epoch": 0.520291790814754, "grad_norm": 0.12454935908317566, "learning_rate": 0.01, "loss": 2.1864, "step": 5064 }, { "epoch": 0.520600020548649, "grad_norm": 0.0716002956032753, "learning_rate": 0.01, "loss": 2.1866, "step": 5067 }, { "epoch": 0.520908250282544, "grad_norm": 0.055079616606235504, "learning_rate": 0.01, "loss": 2.2137, "step": 5070 }, { "epoch": 0.5212164800164389, "grad_norm": 0.05969909206032753, "learning_rate": 0.01, "loss": 2.1972, "step": 5073 }, { "epoch": 0.5215247097503339, "grad_norm": 0.07373122125864029, "learning_rate": 0.01, "loss": 2.2312, "step": 5076 }, { "epoch": 0.5218329394842289, "grad_norm": 0.1899929642677307, "learning_rate": 0.01, "loss": 2.2141, "step": 5079 }, { "epoch": 0.5221411692181239, "grad_norm": 0.05221979692578316, "learning_rate": 0.01, "loss": 2.1899, "step": 5082 }, { "epoch": 0.5224493989520189, "grad_norm": 0.04537337273359299, "learning_rate": 0.01, "loss": 2.1571, "step": 5085 }, { "epoch": 0.5227576286859139, "grad_norm": 0.05490431934595108, "learning_rate": 0.01, "loss": 2.1705, "step": 5088 }, { "epoch": 0.5230658584198089, "grad_norm": 0.03813198208808899, "learning_rate": 0.01, "loss": 2.1773, "step": 5091 }, { "epoch": 0.5233740881537039, "grad_norm": 0.045411352068185806, "learning_rate": 0.01, "loss": 2.2065, "step": 5094 }, { "epoch": 0.5236823178875989, "grad_norm": 0.05433456227183342, "learning_rate": 0.01, "loss": 2.1901, "step": 5097 }, { "epoch": 0.5239905476214939, "grad_norm": 0.10771681368350983, "learning_rate": 0.01, "loss": 2.171, "step": 5100 }, { "epoch": 0.5242987773553889, "grad_norm": 0.06446761637926102, "learning_rate": 0.01, "loss": 2.2033, "step": 5103 }, { "epoch": 0.5246070070892839, "grad_norm": 0.06428392231464386, "learning_rate": 0.01, "loss": 2.1877, "step": 5106 }, { "epoch": 0.5249152368231789, "grad_norm": 0.0525304451584816, "learning_rate": 0.01, "loss": 2.206, "step": 5109 }, { "epoch": 0.5252234665570739, "grad_norm": 0.07332491129636765, "learning_rate": 0.01, "loss": 2.1992, "step": 5112 }, { "epoch": 0.5255316962909689, "grad_norm": 0.1738174557685852, "learning_rate": 0.01, "loss": 2.176, "step": 5115 }, { "epoch": 0.5258399260248638, "grad_norm": 0.08102334290742874, "learning_rate": 0.01, "loss": 2.2067, "step": 5118 }, { "epoch": 0.5261481557587588, "grad_norm": 0.06945500522851944, "learning_rate": 0.01, "loss": 2.206, "step": 5121 }, { "epoch": 0.5264563854926538, "grad_norm": 0.07017000019550323, "learning_rate": 0.01, "loss": 2.2002, "step": 5124 }, { "epoch": 0.5267646152265488, "grad_norm": 0.03883346915245056, "learning_rate": 0.01, "loss": 2.1608, "step": 5127 }, { "epoch": 0.5270728449604438, "grad_norm": 0.050974566489458084, "learning_rate": 0.01, "loss": 2.1973, "step": 5130 }, { "epoch": 0.5273810746943388, "grad_norm": 0.0665312334895134, "learning_rate": 0.01, "loss": 2.2064, "step": 5133 }, { "epoch": 0.5276893044282338, "grad_norm": 0.03946761414408684, "learning_rate": 0.01, "loss": 2.1794, "step": 5136 }, { "epoch": 0.5279975341621288, "grad_norm": 0.046160902827978134, "learning_rate": 0.01, "loss": 2.1919, "step": 5139 }, { "epoch": 0.5283057638960238, "grad_norm": 0.046186063438653946, "learning_rate": 0.01, "loss": 2.1687, "step": 5142 }, { "epoch": 0.5286139936299188, "grad_norm": 0.07073774188756943, "learning_rate": 0.01, "loss": 2.2154, "step": 5145 }, { "epoch": 0.5289222233638138, "grad_norm": 0.08319617807865143, "learning_rate": 0.01, "loss": 2.2104, "step": 5148 }, { "epoch": 0.5292304530977088, "grad_norm": 0.08551453799009323, "learning_rate": 0.01, "loss": 2.1743, "step": 5151 }, { "epoch": 0.5295386828316038, "grad_norm": 0.06613323837518692, "learning_rate": 0.01, "loss": 2.1774, "step": 5154 }, { "epoch": 0.5298469125654989, "grad_norm": 0.0601351298391819, "learning_rate": 0.01, "loss": 2.1831, "step": 5157 }, { "epoch": 0.5301551422993939, "grad_norm": 0.04963411018252373, "learning_rate": 0.01, "loss": 2.1915, "step": 5160 }, { "epoch": 0.5304633720332889, "grad_norm": 0.06755329668521881, "learning_rate": 0.01, "loss": 2.163, "step": 5163 }, { "epoch": 0.5307716017671839, "grad_norm": 0.04755258187651634, "learning_rate": 0.01, "loss": 2.1518, "step": 5166 }, { "epoch": 0.5310798315010788, "grad_norm": 0.15916316211223602, "learning_rate": 0.01, "loss": 2.1835, "step": 5169 }, { "epoch": 0.5313880612349738, "grad_norm": 0.0807122215628624, "learning_rate": 0.01, "loss": 2.2193, "step": 5172 }, { "epoch": 0.5316962909688688, "grad_norm": 0.05207689106464386, "learning_rate": 0.01, "loss": 2.1754, "step": 5175 }, { "epoch": 0.5320045207027638, "grad_norm": 0.045082803815603256, "learning_rate": 0.01, "loss": 2.1525, "step": 5178 }, { "epoch": 0.5323127504366588, "grad_norm": 0.07747700810432434, "learning_rate": 0.01, "loss": 2.1438, "step": 5181 }, { "epoch": 0.5326209801705538, "grad_norm": 0.13366450369358063, "learning_rate": 0.01, "loss": 2.1904, "step": 5184 }, { "epoch": 0.5329292099044488, "grad_norm": 0.06902889162302017, "learning_rate": 0.01, "loss": 2.1786, "step": 5187 }, { "epoch": 0.5332374396383438, "grad_norm": 0.04604712501168251, "learning_rate": 0.01, "loss": 2.1848, "step": 5190 }, { "epoch": 0.5335456693722388, "grad_norm": 0.08803047984838486, "learning_rate": 0.01, "loss": 2.1798, "step": 5193 }, { "epoch": 0.5338538991061338, "grad_norm": 0.08366485685110092, "learning_rate": 0.01, "loss": 2.2008, "step": 5196 }, { "epoch": 0.5341621288400288, "grad_norm": 0.06176333501935005, "learning_rate": 0.01, "loss": 2.1722, "step": 5199 }, { "epoch": 0.5344703585739238, "grad_norm": 0.0837249681353569, "learning_rate": 0.01, "loss": 2.1783, "step": 5202 }, { "epoch": 0.5347785883078188, "grad_norm": 0.1322035938501358, "learning_rate": 0.01, "loss": 2.1948, "step": 5205 }, { "epoch": 0.5350868180417138, "grad_norm": 0.11094444990158081, "learning_rate": 0.01, "loss": 2.1937, "step": 5208 }, { "epoch": 0.5353950477756088, "grad_norm": 0.05182232707738876, "learning_rate": 0.01, "loss": 2.1945, "step": 5211 }, { "epoch": 0.5357032775095038, "grad_norm": 0.08261944353580475, "learning_rate": 0.01, "loss": 2.1855, "step": 5214 }, { "epoch": 0.5360115072433987, "grad_norm": 0.11097295582294464, "learning_rate": 0.01, "loss": 2.1902, "step": 5217 }, { "epoch": 0.5363197369772937, "grad_norm": 0.05825675278902054, "learning_rate": 0.01, "loss": 2.1984, "step": 5220 }, { "epoch": 0.5366279667111887, "grad_norm": 0.11612821370363235, "learning_rate": 0.01, "loss": 2.1679, "step": 5223 }, { "epoch": 0.5369361964450837, "grad_norm": 0.09120064228773117, "learning_rate": 0.01, "loss": 2.1586, "step": 5226 }, { "epoch": 0.5372444261789787, "grad_norm": 0.055181995034217834, "learning_rate": 0.01, "loss": 2.2169, "step": 5229 }, { "epoch": 0.5375526559128737, "grad_norm": 0.055780068039894104, "learning_rate": 0.01, "loss": 2.1748, "step": 5232 }, { "epoch": 0.5378608856467687, "grad_norm": 0.06303024291992188, "learning_rate": 0.01, "loss": 2.1782, "step": 5235 }, { "epoch": 0.5381691153806637, "grad_norm": 0.10456321388483047, "learning_rate": 0.01, "loss": 2.1975, "step": 5238 }, { "epoch": 0.5384773451145587, "grad_norm": 0.054177962243556976, "learning_rate": 0.01, "loss": 2.1641, "step": 5241 }, { "epoch": 0.5387855748484537, "grad_norm": 0.06265738606452942, "learning_rate": 0.01, "loss": 2.183, "step": 5244 }, { "epoch": 0.5390938045823487, "grad_norm": 0.13720418512821198, "learning_rate": 0.01, "loss": 2.1698, "step": 5247 }, { "epoch": 0.5394020343162437, "grad_norm": 0.04917861148715019, "learning_rate": 0.01, "loss": 2.1692, "step": 5250 }, { "epoch": 0.5397102640501387, "grad_norm": 0.04919945448637009, "learning_rate": 0.01, "loss": 2.1652, "step": 5253 }, { "epoch": 0.5400184937840337, "grad_norm": 0.06462734192609787, "learning_rate": 0.01, "loss": 2.1987, "step": 5256 }, { "epoch": 0.5403267235179287, "grad_norm": 0.05275480076670647, "learning_rate": 0.01, "loss": 2.1955, "step": 5259 }, { "epoch": 0.5406349532518236, "grad_norm": 0.12235717475414276, "learning_rate": 0.01, "loss": 2.1937, "step": 5262 }, { "epoch": 0.5409431829857186, "grad_norm": 0.05300014466047287, "learning_rate": 0.01, "loss": 2.1589, "step": 5265 }, { "epoch": 0.5412514127196136, "grad_norm": 0.0429493710398674, "learning_rate": 0.01, "loss": 2.1618, "step": 5268 }, { "epoch": 0.5415596424535086, "grad_norm": 0.07041259855031967, "learning_rate": 0.01, "loss": 2.1661, "step": 5271 }, { "epoch": 0.5418678721874037, "grad_norm": 0.05304478853940964, "learning_rate": 0.01, "loss": 2.183, "step": 5274 }, { "epoch": 0.5421761019212987, "grad_norm": 0.12009457498788834, "learning_rate": 0.01, "loss": 2.1862, "step": 5277 }, { "epoch": 0.5424843316551937, "grad_norm": 0.11629784107208252, "learning_rate": 0.01, "loss": 2.1897, "step": 5280 }, { "epoch": 0.5427925613890887, "grad_norm": 0.07305426150560379, "learning_rate": 0.01, "loss": 2.1832, "step": 5283 }, { "epoch": 0.5431007911229837, "grad_norm": 0.0855623185634613, "learning_rate": 0.01, "loss": 2.1884, "step": 5286 }, { "epoch": 0.5434090208568787, "grad_norm": 0.04178578779101372, "learning_rate": 0.01, "loss": 2.1872, "step": 5289 }, { "epoch": 0.5437172505907737, "grad_norm": 0.05382310971617699, "learning_rate": 0.01, "loss": 2.1901, "step": 5292 }, { "epoch": 0.5440254803246687, "grad_norm": 0.10682760924100876, "learning_rate": 0.01, "loss": 2.1957, "step": 5295 }, { "epoch": 0.5443337100585637, "grad_norm": 0.15037471055984497, "learning_rate": 0.01, "loss": 2.2085, "step": 5298 }, { "epoch": 0.5446419397924587, "grad_norm": 0.08333491533994675, "learning_rate": 0.01, "loss": 2.1964, "step": 5301 }, { "epoch": 0.5449501695263537, "grad_norm": 0.08964785933494568, "learning_rate": 0.01, "loss": 2.1613, "step": 5304 }, { "epoch": 0.5452583992602487, "grad_norm": 0.06194687634706497, "learning_rate": 0.01, "loss": 2.1711, "step": 5307 }, { "epoch": 0.5455666289941437, "grad_norm": 0.047254305332899094, "learning_rate": 0.01, "loss": 2.1956, "step": 5310 }, { "epoch": 0.5458748587280386, "grad_norm": 0.052926719188690186, "learning_rate": 0.01, "loss": 2.1767, "step": 5313 }, { "epoch": 0.5461830884619336, "grad_norm": 0.08765383809804916, "learning_rate": 0.01, "loss": 2.1782, "step": 5316 }, { "epoch": 0.5464913181958286, "grad_norm": 0.0749160572886467, "learning_rate": 0.01, "loss": 2.1875, "step": 5319 }, { "epoch": 0.5467995479297236, "grad_norm": 0.09781020879745483, "learning_rate": 0.01, "loss": 2.1748, "step": 5322 }, { "epoch": 0.5471077776636186, "grad_norm": 0.04605260491371155, "learning_rate": 0.01, "loss": 2.145, "step": 5325 }, { "epoch": 0.5474160073975136, "grad_norm": 0.13507331907749176, "learning_rate": 0.01, "loss": 2.1769, "step": 5328 }, { "epoch": 0.5477242371314086, "grad_norm": 0.05028709024190903, "learning_rate": 0.01, "loss": 2.1925, "step": 5331 }, { "epoch": 0.5480324668653036, "grad_norm": 0.08754327893257141, "learning_rate": 0.01, "loss": 2.159, "step": 5334 }, { "epoch": 0.5483406965991986, "grad_norm": 0.10449190437793732, "learning_rate": 0.01, "loss": 2.1898, "step": 5337 }, { "epoch": 0.5486489263330936, "grad_norm": 0.10263057053089142, "learning_rate": 0.01, "loss": 2.1776, "step": 5340 }, { "epoch": 0.5489571560669886, "grad_norm": 0.0547097772359848, "learning_rate": 0.01, "loss": 2.1941, "step": 5343 }, { "epoch": 0.5492653858008836, "grad_norm": 0.06682941317558289, "learning_rate": 0.01, "loss": 2.1901, "step": 5346 }, { "epoch": 0.5495736155347786, "grad_norm": 0.06421027332544327, "learning_rate": 0.01, "loss": 2.1981, "step": 5349 }, { "epoch": 0.5498818452686736, "grad_norm": 0.041218411177396774, "learning_rate": 0.01, "loss": 2.1844, "step": 5352 }, { "epoch": 0.5501900750025686, "grad_norm": 0.042902372777462006, "learning_rate": 0.01, "loss": 2.1981, "step": 5355 }, { "epoch": 0.5504983047364636, "grad_norm": 0.05338321253657341, "learning_rate": 0.01, "loss": 2.168, "step": 5358 }, { "epoch": 0.5508065344703585, "grad_norm": 0.06692427396774292, "learning_rate": 0.01, "loss": 2.1891, "step": 5361 }, { "epoch": 0.5511147642042535, "grad_norm": 0.07927200943231583, "learning_rate": 0.01, "loss": 2.1853, "step": 5364 }, { "epoch": 0.5514229939381485, "grad_norm": 0.05655739828944206, "learning_rate": 0.01, "loss": 2.1838, "step": 5367 }, { "epoch": 0.5517312236720435, "grad_norm": 0.04488144442439079, "learning_rate": 0.01, "loss": 2.1754, "step": 5370 }, { "epoch": 0.5520394534059385, "grad_norm": 0.09253795444965363, "learning_rate": 0.01, "loss": 2.1742, "step": 5373 }, { "epoch": 0.5523476831398335, "grad_norm": 0.07396019250154495, "learning_rate": 0.01, "loss": 2.1582, "step": 5376 }, { "epoch": 0.5526559128737285, "grad_norm": 0.053663600236177444, "learning_rate": 0.01, "loss": 2.1508, "step": 5379 }, { "epoch": 0.5529641426076235, "grad_norm": 0.062076181173324585, "learning_rate": 0.01, "loss": 2.1772, "step": 5382 }, { "epoch": 0.5532723723415185, "grad_norm": 0.08481581509113312, "learning_rate": 0.01, "loss": 2.1836, "step": 5385 }, { "epoch": 0.5535806020754136, "grad_norm": 0.08981155604124069, "learning_rate": 0.01, "loss": 2.204, "step": 5388 }, { "epoch": 0.5538888318093086, "grad_norm": 0.10067261755466461, "learning_rate": 0.01, "loss": 2.1527, "step": 5391 }, { "epoch": 0.5541970615432036, "grad_norm": 0.06231047958135605, "learning_rate": 0.01, "loss": 2.194, "step": 5394 }, { "epoch": 0.5545052912770986, "grad_norm": 0.049111973494291306, "learning_rate": 0.01, "loss": 2.1889, "step": 5397 }, { "epoch": 0.5548135210109936, "grad_norm": 0.06446948647499084, "learning_rate": 0.01, "loss": 2.2103, "step": 5400 }, { "epoch": 0.5551217507448886, "grad_norm": 0.051946625113487244, "learning_rate": 0.01, "loss": 2.1977, "step": 5403 }, { "epoch": 0.5554299804787836, "grad_norm": 0.1369265466928482, "learning_rate": 0.01, "loss": 2.1771, "step": 5406 }, { "epoch": 0.5557382102126786, "grad_norm": 0.08489779382944107, "learning_rate": 0.01, "loss": 2.1782, "step": 5409 }, { "epoch": 0.5560464399465735, "grad_norm": 0.10673670470714569, "learning_rate": 0.01, "loss": 2.173, "step": 5412 }, { "epoch": 0.5563546696804685, "grad_norm": 0.055250637233257294, "learning_rate": 0.01, "loss": 2.1539, "step": 5415 }, { "epoch": 0.5566628994143635, "grad_norm": 0.05136672407388687, "learning_rate": 0.01, "loss": 2.2035, "step": 5418 }, { "epoch": 0.5569711291482585, "grad_norm": 0.040590591728687286, "learning_rate": 0.01, "loss": 2.1778, "step": 5421 }, { "epoch": 0.5572793588821535, "grad_norm": 0.048333633691072464, "learning_rate": 0.01, "loss": 2.191, "step": 5424 }, { "epoch": 0.5575875886160485, "grad_norm": 0.0582728311419487, "learning_rate": 0.01, "loss": 2.1734, "step": 5427 }, { "epoch": 0.5578958183499435, "grad_norm": 0.05272262915968895, "learning_rate": 0.01, "loss": 2.1714, "step": 5430 }, { "epoch": 0.5582040480838385, "grad_norm": 0.08472342789173126, "learning_rate": 0.01, "loss": 2.1624, "step": 5433 }, { "epoch": 0.5585122778177335, "grad_norm": 0.10869960486888885, "learning_rate": 0.01, "loss": 2.164, "step": 5436 }, { "epoch": 0.5588205075516285, "grad_norm": 0.0569114163517952, "learning_rate": 0.01, "loss": 2.1933, "step": 5439 }, { "epoch": 0.5591287372855235, "grad_norm": 0.14485467970371246, "learning_rate": 0.01, "loss": 2.1779, "step": 5442 }, { "epoch": 0.5594369670194185, "grad_norm": 0.08184878528118134, "learning_rate": 0.01, "loss": 2.1779, "step": 5445 }, { "epoch": 0.5597451967533135, "grad_norm": 0.06575775146484375, "learning_rate": 0.01, "loss": 2.136, "step": 5448 }, { "epoch": 0.5600534264872085, "grad_norm": 0.08628299832344055, "learning_rate": 0.01, "loss": 2.1696, "step": 5451 }, { "epoch": 0.5603616562211035, "grad_norm": 0.06078352406620979, "learning_rate": 0.01, "loss": 2.1865, "step": 5454 }, { "epoch": 0.5606698859549984, "grad_norm": 0.05207353085279465, "learning_rate": 0.01, "loss": 2.167, "step": 5457 }, { "epoch": 0.5609781156888934, "grad_norm": 0.059535857290029526, "learning_rate": 0.01, "loss": 2.1977, "step": 5460 }, { "epoch": 0.5612863454227884, "grad_norm": 0.05342729389667511, "learning_rate": 0.01, "loss": 2.1823, "step": 5463 }, { "epoch": 0.5615945751566834, "grad_norm": 0.04207632318139076, "learning_rate": 0.01, "loss": 2.1849, "step": 5466 }, { "epoch": 0.5619028048905784, "grad_norm": 0.1334255486726761, "learning_rate": 0.01, "loss": 2.1886, "step": 5469 }, { "epoch": 0.5622110346244734, "grad_norm": 0.06532323360443115, "learning_rate": 0.01, "loss": 2.1979, "step": 5472 }, { "epoch": 0.5625192643583684, "grad_norm": 0.0793483555316925, "learning_rate": 0.01, "loss": 2.188, "step": 5475 }, { "epoch": 0.5628274940922634, "grad_norm": 0.04637480154633522, "learning_rate": 0.01, "loss": 2.1562, "step": 5478 }, { "epoch": 0.5631357238261584, "grad_norm": 0.0482000894844532, "learning_rate": 0.01, "loss": 2.1587, "step": 5481 }, { "epoch": 0.5634439535600534, "grad_norm": 0.06253401190042496, "learning_rate": 0.01, "loss": 2.1978, "step": 5484 }, { "epoch": 0.5637521832939484, "grad_norm": 0.15622715651988983, "learning_rate": 0.01, "loss": 2.171, "step": 5487 }, { "epoch": 0.5640604130278434, "grad_norm": 0.10941077768802643, "learning_rate": 0.01, "loss": 2.1952, "step": 5490 }, { "epoch": 0.5643686427617384, "grad_norm": 0.08030713349580765, "learning_rate": 0.01, "loss": 2.1948, "step": 5493 }, { "epoch": 0.5646768724956334, "grad_norm": 0.13679014146327972, "learning_rate": 0.01, "loss": 2.1767, "step": 5496 }, { "epoch": 0.5649851022295284, "grad_norm": 0.04662426933646202, "learning_rate": 0.01, "loss": 2.1926, "step": 5499 }, { "epoch": 0.5652933319634234, "grad_norm": 0.05347858741879463, "learning_rate": 0.01, "loss": 2.1825, "step": 5502 }, { "epoch": 0.5656015616973185, "grad_norm": 0.06205238029360771, "learning_rate": 0.01, "loss": 2.1537, "step": 5505 }, { "epoch": 0.5659097914312134, "grad_norm": 0.05525955557823181, "learning_rate": 0.01, "loss": 2.1802, "step": 5508 }, { "epoch": 0.5662180211651084, "grad_norm": 0.055693045258522034, "learning_rate": 0.01, "loss": 2.1727, "step": 5511 }, { "epoch": 0.5665262508990034, "grad_norm": 0.051134396344423294, "learning_rate": 0.01, "loss": 2.1695, "step": 5514 }, { "epoch": 0.5668344806328984, "grad_norm": 0.05469521880149841, "learning_rate": 0.01, "loss": 2.1664, "step": 5517 }, { "epoch": 0.5671427103667934, "grad_norm": 0.039416272193193436, "learning_rate": 0.01, "loss": 2.1708, "step": 5520 }, { "epoch": 0.5674509401006884, "grad_norm": 0.10661659389734268, "learning_rate": 0.01, "loss": 2.1753, "step": 5523 }, { "epoch": 0.5677591698345834, "grad_norm": 0.07567829638719559, "learning_rate": 0.01, "loss": 2.1733, "step": 5526 }, { "epoch": 0.5680673995684784, "grad_norm": 0.06030309572815895, "learning_rate": 0.01, "loss": 2.1795, "step": 5529 }, { "epoch": 0.5683756293023734, "grad_norm": 0.07429811358451843, "learning_rate": 0.01, "loss": 2.1936, "step": 5532 }, { "epoch": 0.5686838590362684, "grad_norm": 0.08618849515914917, "learning_rate": 0.01, "loss": 2.2009, "step": 5535 }, { "epoch": 0.5689920887701634, "grad_norm": 0.04969833791255951, "learning_rate": 0.01, "loss": 2.1711, "step": 5538 }, { "epoch": 0.5693003185040584, "grad_norm": 0.11154712736606598, "learning_rate": 0.01, "loss": 2.1802, "step": 5541 }, { "epoch": 0.5696085482379534, "grad_norm": 0.07754155993461609, "learning_rate": 0.01, "loss": 2.164, "step": 5544 }, { "epoch": 0.5699167779718484, "grad_norm": 0.04600273445248604, "learning_rate": 0.01, "loss": 2.1918, "step": 5547 }, { "epoch": 0.5702250077057434, "grad_norm": 0.06788063049316406, "learning_rate": 0.01, "loss": 2.1477, "step": 5550 }, { "epoch": 0.5705332374396384, "grad_norm": 0.11349419504404068, "learning_rate": 0.01, "loss": 2.1603, "step": 5553 }, { "epoch": 0.5708414671735333, "grad_norm": 0.11178430914878845, "learning_rate": 0.01, "loss": 2.1439, "step": 5556 }, { "epoch": 0.5711496969074283, "grad_norm": 0.050257500261068344, "learning_rate": 0.01, "loss": 2.1851, "step": 5559 }, { "epoch": 0.5714579266413233, "grad_norm": 0.08327756822109222, "learning_rate": 0.01, "loss": 2.158, "step": 5562 }, { "epoch": 0.5717661563752183, "grad_norm": 0.06866388767957687, "learning_rate": 0.01, "loss": 2.1584, "step": 5565 }, { "epoch": 0.5720743861091133, "grad_norm": 0.1139674037694931, "learning_rate": 0.01, "loss": 2.1897, "step": 5568 }, { "epoch": 0.5723826158430083, "grad_norm": 0.07029612362384796, "learning_rate": 0.01, "loss": 2.1723, "step": 5571 }, { "epoch": 0.5726908455769033, "grad_norm": 0.10171212255954742, "learning_rate": 0.01, "loss": 2.1788, "step": 5574 }, { "epoch": 0.5729990753107983, "grad_norm": 0.11364202946424484, "learning_rate": 0.01, "loss": 2.1659, "step": 5577 }, { "epoch": 0.5733073050446933, "grad_norm": 0.08066857606172562, "learning_rate": 0.01, "loss": 2.1902, "step": 5580 }, { "epoch": 0.5736155347785883, "grad_norm": 0.09207342565059662, "learning_rate": 0.01, "loss": 2.1519, "step": 5583 }, { "epoch": 0.5739237645124833, "grad_norm": 0.06945987790822983, "learning_rate": 0.01, "loss": 2.1677, "step": 5586 }, { "epoch": 0.5742319942463783, "grad_norm": 0.05137445777654648, "learning_rate": 0.01, "loss": 2.1686, "step": 5589 }, { "epoch": 0.5745402239802733, "grad_norm": 0.10192268341779709, "learning_rate": 0.01, "loss": 2.1758, "step": 5592 }, { "epoch": 0.5748484537141683, "grad_norm": 0.056787896901369095, "learning_rate": 0.01, "loss": 2.1642, "step": 5595 }, { "epoch": 0.5751566834480633, "grad_norm": 0.07727455347776413, "learning_rate": 0.01, "loss": 2.1662, "step": 5598 }, { "epoch": 0.5754649131819582, "grad_norm": 0.1311456710100174, "learning_rate": 0.01, "loss": 2.1713, "step": 5601 }, { "epoch": 0.5757731429158532, "grad_norm": 0.1014258936047554, "learning_rate": 0.01, "loss": 2.1751, "step": 5604 }, { "epoch": 0.5760813726497482, "grad_norm": 0.06325560063123703, "learning_rate": 0.01, "loss": 2.1757, "step": 5607 }, { "epoch": 0.5763896023836432, "grad_norm": 0.07262448221445084, "learning_rate": 0.01, "loss": 2.1575, "step": 5610 }, { "epoch": 0.5766978321175382, "grad_norm": 0.07160039991140366, "learning_rate": 0.01, "loss": 2.1706, "step": 5613 }, { "epoch": 0.5770060618514332, "grad_norm": 0.050024017691612244, "learning_rate": 0.01, "loss": 2.1811, "step": 5616 }, { "epoch": 0.5773142915853282, "grad_norm": 0.09685138612985611, "learning_rate": 0.01, "loss": 2.1549, "step": 5619 }, { "epoch": 0.5776225213192233, "grad_norm": 0.058329988270998, "learning_rate": 0.01, "loss": 2.1813, "step": 5622 }, { "epoch": 0.5779307510531183, "grad_norm": 0.06637705117464066, "learning_rate": 0.01, "loss": 2.1717, "step": 5625 }, { "epoch": 0.5782389807870133, "grad_norm": 0.0906175896525383, "learning_rate": 0.01, "loss": 2.1677, "step": 5628 }, { "epoch": 0.5785472105209083, "grad_norm": 0.06751519441604614, "learning_rate": 0.01, "loss": 2.1584, "step": 5631 }, { "epoch": 0.5788554402548033, "grad_norm": 0.04437318444252014, "learning_rate": 0.01, "loss": 2.2013, "step": 5634 }, { "epoch": 0.5791636699886983, "grad_norm": 0.04365368187427521, "learning_rate": 0.01, "loss": 2.1746, "step": 5637 }, { "epoch": 0.5794718997225933, "grad_norm": 0.04844829812645912, "learning_rate": 0.01, "loss": 2.1818, "step": 5640 }, { "epoch": 0.5797801294564883, "grad_norm": 0.04154437035322189, "learning_rate": 0.01, "loss": 2.1536, "step": 5643 }, { "epoch": 0.5800883591903833, "grad_norm": 0.07691098004579544, "learning_rate": 0.01, "loss": 2.1883, "step": 5646 }, { "epoch": 0.5803965889242783, "grad_norm": 0.07065980136394501, "learning_rate": 0.01, "loss": 2.154, "step": 5649 }, { "epoch": 0.5807048186581732, "grad_norm": 0.1051129475235939, "learning_rate": 0.01, "loss": 2.1447, "step": 5652 }, { "epoch": 0.5810130483920682, "grad_norm": 0.10921964794397354, "learning_rate": 0.01, "loss": 2.1777, "step": 5655 }, { "epoch": 0.5813212781259632, "grad_norm": 0.1120898649096489, "learning_rate": 0.01, "loss": 2.1617, "step": 5658 }, { "epoch": 0.5816295078598582, "grad_norm": 0.09593590348958969, "learning_rate": 0.01, "loss": 2.1457, "step": 5661 }, { "epoch": 0.5819377375937532, "grad_norm": 0.054108936339616776, "learning_rate": 0.01, "loss": 2.1793, "step": 5664 }, { "epoch": 0.5822459673276482, "grad_norm": 0.07890141755342484, "learning_rate": 0.01, "loss": 2.1676, "step": 5667 }, { "epoch": 0.5825541970615432, "grad_norm": 0.07864063233137131, "learning_rate": 0.01, "loss": 2.1766, "step": 5670 }, { "epoch": 0.5828624267954382, "grad_norm": 0.08160068094730377, "learning_rate": 0.01, "loss": 2.166, "step": 5673 }, { "epoch": 0.5831706565293332, "grad_norm": 0.08126121759414673, "learning_rate": 0.01, "loss": 2.1691, "step": 5676 }, { "epoch": 0.5834788862632282, "grad_norm": 0.05922897160053253, "learning_rate": 0.01, "loss": 2.1854, "step": 5679 }, { "epoch": 0.5837871159971232, "grad_norm": 0.1024496778845787, "learning_rate": 0.01, "loss": 2.1818, "step": 5682 }, { "epoch": 0.5840953457310182, "grad_norm": 0.08880037069320679, "learning_rate": 0.01, "loss": 2.2054, "step": 5685 }, { "epoch": 0.5844035754649132, "grad_norm": 0.04404953494668007, "learning_rate": 0.01, "loss": 2.1524, "step": 5688 }, { "epoch": 0.5847118051988082, "grad_norm": 0.05817687511444092, "learning_rate": 0.01, "loss": 2.1813, "step": 5691 }, { "epoch": 0.5850200349327032, "grad_norm": 0.047581762075424194, "learning_rate": 0.01, "loss": 2.1545, "step": 5694 }, { "epoch": 0.5853282646665982, "grad_norm": 0.11034911125898361, "learning_rate": 0.01, "loss": 2.1803, "step": 5697 }, { "epoch": 0.5856364944004931, "grad_norm": 0.05118945613503456, "learning_rate": 0.01, "loss": 2.1314, "step": 5700 }, { "epoch": 0.5859447241343881, "grad_norm": 0.048316795378923416, "learning_rate": 0.01, "loss": 2.1711, "step": 5703 }, { "epoch": 0.5862529538682831, "grad_norm": 0.12578584253787994, "learning_rate": 0.01, "loss": 2.1636, "step": 5706 }, { "epoch": 0.5865611836021781, "grad_norm": 0.06594375520944595, "learning_rate": 0.01, "loss": 2.1977, "step": 5709 }, { "epoch": 0.5868694133360731, "grad_norm": 0.060622964054346085, "learning_rate": 0.01, "loss": 2.1408, "step": 5712 }, { "epoch": 0.5871776430699681, "grad_norm": 0.10055366903543472, "learning_rate": 0.01, "loss": 2.1999, "step": 5715 }, { "epoch": 0.5874858728038631, "grad_norm": 0.10235504060983658, "learning_rate": 0.01, "loss": 2.1337, "step": 5718 }, { "epoch": 0.5877941025377581, "grad_norm": 0.07707791030406952, "learning_rate": 0.01, "loss": 2.1387, "step": 5721 }, { "epoch": 0.5881023322716531, "grad_norm": 0.05508594587445259, "learning_rate": 0.01, "loss": 2.1494, "step": 5724 }, { "epoch": 0.5884105620055481, "grad_norm": 0.06580860912799835, "learning_rate": 0.01, "loss": 2.1598, "step": 5727 }, { "epoch": 0.5887187917394431, "grad_norm": 0.07102775573730469, "learning_rate": 0.01, "loss": 2.1618, "step": 5730 }, { "epoch": 0.5890270214733381, "grad_norm": 0.06750554591417313, "learning_rate": 0.01, "loss": 2.1782, "step": 5733 }, { "epoch": 0.5893352512072332, "grad_norm": 0.07100195437669754, "learning_rate": 0.01, "loss": 2.1456, "step": 5736 }, { "epoch": 0.5896434809411282, "grad_norm": 0.10585575550794601, "learning_rate": 0.01, "loss": 2.1751, "step": 5739 }, { "epoch": 0.5899517106750232, "grad_norm": 0.055082373321056366, "learning_rate": 0.01, "loss": 2.1808, "step": 5742 }, { "epoch": 0.5902599404089182, "grad_norm": 0.06285014003515244, "learning_rate": 0.01, "loss": 2.1588, "step": 5745 }, { "epoch": 0.5905681701428132, "grad_norm": 0.13328112661838531, "learning_rate": 0.01, "loss": 2.177, "step": 5748 }, { "epoch": 0.5908763998767081, "grad_norm": 0.08568006008863449, "learning_rate": 0.01, "loss": 2.1559, "step": 5751 }, { "epoch": 0.5911846296106031, "grad_norm": 0.07850711792707443, "learning_rate": 0.01, "loss": 2.2047, "step": 5754 }, { "epoch": 0.5914928593444981, "grad_norm": 0.07706760615110397, "learning_rate": 0.01, "loss": 2.1602, "step": 5757 }, { "epoch": 0.5918010890783931, "grad_norm": 0.07512292265892029, "learning_rate": 0.01, "loss": 2.1871, "step": 5760 }, { "epoch": 0.5921093188122881, "grad_norm": 0.059620197862386703, "learning_rate": 0.01, "loss": 2.1484, "step": 5763 }, { "epoch": 0.5924175485461831, "grad_norm": 0.04021789878606796, "learning_rate": 0.01, "loss": 2.1651, "step": 5766 }, { "epoch": 0.5927257782800781, "grad_norm": 0.050683967769145966, "learning_rate": 0.01, "loss": 2.1693, "step": 5769 }, { "epoch": 0.5930340080139731, "grad_norm": 0.07091210782527924, "learning_rate": 0.01, "loss": 2.1851, "step": 5772 }, { "epoch": 0.5933422377478681, "grad_norm": 0.09877889603376389, "learning_rate": 0.01, "loss": 2.1642, "step": 5775 }, { "epoch": 0.5936504674817631, "grad_norm": 0.08692251145839691, "learning_rate": 0.01, "loss": 2.1755, "step": 5778 }, { "epoch": 0.5939586972156581, "grad_norm": 0.06255677342414856, "learning_rate": 0.01, "loss": 2.1634, "step": 5781 }, { "epoch": 0.5942669269495531, "grad_norm": 0.05615478754043579, "learning_rate": 0.01, "loss": 2.1909, "step": 5784 }, { "epoch": 0.5945751566834481, "grad_norm": 0.04576956480741501, "learning_rate": 0.01, "loss": 2.1519, "step": 5787 }, { "epoch": 0.5948833864173431, "grad_norm": 0.044911667704582214, "learning_rate": 0.01, "loss": 2.1697, "step": 5790 }, { "epoch": 0.5951916161512381, "grad_norm": 0.07787128537893295, "learning_rate": 0.01, "loss": 2.1611, "step": 5793 }, { "epoch": 0.595499845885133, "grad_norm": 0.06199866533279419, "learning_rate": 0.01, "loss": 2.1576, "step": 5796 }, { "epoch": 0.595808075619028, "grad_norm": 0.07048948854207993, "learning_rate": 0.01, "loss": 2.1721, "step": 5799 }, { "epoch": 0.596116305352923, "grad_norm": 0.1173306256532669, "learning_rate": 0.01, "loss": 2.1573, "step": 5802 }, { "epoch": 0.596424535086818, "grad_norm": 0.06866045296192169, "learning_rate": 0.01, "loss": 2.1606, "step": 5805 }, { "epoch": 0.596732764820713, "grad_norm": 0.06821485608816147, "learning_rate": 0.01, "loss": 2.1842, "step": 5808 }, { "epoch": 0.597040994554608, "grad_norm": 0.09566816687583923, "learning_rate": 0.01, "loss": 2.1569, "step": 5811 }, { "epoch": 0.597349224288503, "grad_norm": 0.1130233108997345, "learning_rate": 0.01, "loss": 2.1649, "step": 5814 }, { "epoch": 0.597657454022398, "grad_norm": 0.07310149073600769, "learning_rate": 0.01, "loss": 2.1798, "step": 5817 }, { "epoch": 0.597965683756293, "grad_norm": 0.04523763060569763, "learning_rate": 0.01, "loss": 2.1515, "step": 5820 }, { "epoch": 0.598273913490188, "grad_norm": 0.05843660235404968, "learning_rate": 0.01, "loss": 2.1403, "step": 5823 }, { "epoch": 0.598582143224083, "grad_norm": 0.03981595113873482, "learning_rate": 0.01, "loss": 2.1598, "step": 5826 }, { "epoch": 0.598890372957978, "grad_norm": 0.057108644396066666, "learning_rate": 0.01, "loss": 2.1619, "step": 5829 }, { "epoch": 0.599198602691873, "grad_norm": 0.12298591434955597, "learning_rate": 0.01, "loss": 2.125, "step": 5832 }, { "epoch": 0.599506832425768, "grad_norm": 0.06120186299085617, "learning_rate": 0.01, "loss": 2.181, "step": 5835 }, { "epoch": 0.599815062159663, "grad_norm": 0.05780164897441864, "learning_rate": 0.01, "loss": 2.1555, "step": 5838 }, { "epoch": 0.600123291893558, "grad_norm": 0.0962534248828888, "learning_rate": 0.01, "loss": 2.1626, "step": 5841 }, { "epoch": 0.600431521627453, "grad_norm": 0.07417720556259155, "learning_rate": 0.01, "loss": 2.1996, "step": 5844 }, { "epoch": 0.6007397513613479, "grad_norm": 0.08221522718667984, "learning_rate": 0.01, "loss": 2.1562, "step": 5847 }, { "epoch": 0.6010479810952429, "grad_norm": 0.11511900275945663, "learning_rate": 0.01, "loss": 2.196, "step": 5850 }, { "epoch": 0.601356210829138, "grad_norm": 0.1331305354833603, "learning_rate": 0.01, "loss": 2.1649, "step": 5853 }, { "epoch": 0.601664440563033, "grad_norm": 0.07239941507577896, "learning_rate": 0.01, "loss": 2.1695, "step": 5856 }, { "epoch": 0.601972670296928, "grad_norm": 0.05865192040801048, "learning_rate": 0.01, "loss": 2.1808, "step": 5859 }, { "epoch": 0.602280900030823, "grad_norm": 0.047268107533454895, "learning_rate": 0.01, "loss": 2.176, "step": 5862 }, { "epoch": 0.602589129764718, "grad_norm": 0.046770863234996796, "learning_rate": 0.01, "loss": 2.1689, "step": 5865 }, { "epoch": 0.602897359498613, "grad_norm": 0.04817832633852959, "learning_rate": 0.01, "loss": 2.1566, "step": 5868 }, { "epoch": 0.603205589232508, "grad_norm": 0.05692889541387558, "learning_rate": 0.01, "loss": 2.1564, "step": 5871 }, { "epoch": 0.603513818966403, "grad_norm": 0.056694116443395615, "learning_rate": 0.01, "loss": 2.1591, "step": 5874 }, { "epoch": 0.603822048700298, "grad_norm": 0.08296339213848114, "learning_rate": 0.01, "loss": 2.1695, "step": 5877 }, { "epoch": 0.604130278434193, "grad_norm": 0.0934629738330841, "learning_rate": 0.01, "loss": 2.1472, "step": 5880 }, { "epoch": 0.604438508168088, "grad_norm": 0.10192359238862991, "learning_rate": 0.01, "loss": 2.1441, "step": 5883 }, { "epoch": 0.604746737901983, "grad_norm": 0.04818946123123169, "learning_rate": 0.01, "loss": 2.1747, "step": 5886 }, { "epoch": 0.605054967635878, "grad_norm": 0.10131523758172989, "learning_rate": 0.01, "loss": 2.1546, "step": 5889 }, { "epoch": 0.605363197369773, "grad_norm": 0.07115977257490158, "learning_rate": 0.01, "loss": 2.1597, "step": 5892 }, { "epoch": 0.605671427103668, "grad_norm": 0.03929082304239273, "learning_rate": 0.01, "loss": 2.171, "step": 5895 }, { "epoch": 0.6059796568375629, "grad_norm": 0.04109720513224602, "learning_rate": 0.01, "loss": 2.134, "step": 5898 }, { "epoch": 0.6062878865714579, "grad_norm": 0.05026080831885338, "learning_rate": 0.01, "loss": 2.1491, "step": 5901 }, { "epoch": 0.6065961163053529, "grad_norm": 0.08281126618385315, "learning_rate": 0.01, "loss": 2.1732, "step": 5904 }, { "epoch": 0.6069043460392479, "grad_norm": 0.04994012042880058, "learning_rate": 0.01, "loss": 2.1664, "step": 5907 }, { "epoch": 0.6072125757731429, "grad_norm": 0.06299131363630295, "learning_rate": 0.01, "loss": 2.1669, "step": 5910 }, { "epoch": 0.6075208055070379, "grad_norm": 0.059428080916404724, "learning_rate": 0.01, "loss": 2.1731, "step": 5913 }, { "epoch": 0.6078290352409329, "grad_norm": 0.07036252319812775, "learning_rate": 0.01, "loss": 2.1787, "step": 5916 }, { "epoch": 0.6081372649748279, "grad_norm": 0.04721888527274132, "learning_rate": 0.01, "loss": 2.1531, "step": 5919 }, { "epoch": 0.6084454947087229, "grad_norm": 0.06953759491443634, "learning_rate": 0.01, "loss": 2.1573, "step": 5922 }, { "epoch": 0.6087537244426179, "grad_norm": 0.11679168790578842, "learning_rate": 0.01, "loss": 2.155, "step": 5925 }, { "epoch": 0.6090619541765129, "grad_norm": 0.09196575731039047, "learning_rate": 0.01, "loss": 2.1574, "step": 5928 }, { "epoch": 0.6093701839104079, "grad_norm": 0.05219469591975212, "learning_rate": 0.01, "loss": 2.1605, "step": 5931 }, { "epoch": 0.6096784136443029, "grad_norm": 0.09352759271860123, "learning_rate": 0.01, "loss": 2.1456, "step": 5934 }, { "epoch": 0.6099866433781979, "grad_norm": 0.07393237948417664, "learning_rate": 0.01, "loss": 2.1611, "step": 5937 }, { "epoch": 0.6102948731120929, "grad_norm": 0.06727741658687592, "learning_rate": 0.01, "loss": 2.1599, "step": 5940 }, { "epoch": 0.6106031028459878, "grad_norm": 0.09024669975042343, "learning_rate": 0.01, "loss": 2.1621, "step": 5943 }, { "epoch": 0.6109113325798828, "grad_norm": 0.04514656960964203, "learning_rate": 0.01, "loss": 2.1809, "step": 5946 }, { "epoch": 0.6112195623137778, "grad_norm": 0.04011565446853638, "learning_rate": 0.01, "loss": 2.1715, "step": 5949 }, { "epoch": 0.6115277920476728, "grad_norm": 0.04640655592083931, "learning_rate": 0.01, "loss": 2.15, "step": 5952 }, { "epoch": 0.6118360217815678, "grad_norm": 0.0471080057322979, "learning_rate": 0.01, "loss": 2.1805, "step": 5955 }, { "epoch": 0.6121442515154628, "grad_norm": 0.17398513853549957, "learning_rate": 0.01, "loss": 2.1497, "step": 5958 }, { "epoch": 0.6124524812493578, "grad_norm": 0.06299551576375961, "learning_rate": 0.01, "loss": 2.1387, "step": 5961 }, { "epoch": 0.6127607109832528, "grad_norm": 0.07517322897911072, "learning_rate": 0.01, "loss": 2.1348, "step": 5964 }, { "epoch": 0.6130689407171478, "grad_norm": 0.050515878945589066, "learning_rate": 0.01, "loss": 2.1725, "step": 5967 }, { "epoch": 0.6133771704510429, "grad_norm": 0.04682675376534462, "learning_rate": 0.01, "loss": 2.1759, "step": 5970 }, { "epoch": 0.6136854001849379, "grad_norm": 0.05297816917300224, "learning_rate": 0.01, "loss": 2.1491, "step": 5973 }, { "epoch": 0.6139936299188329, "grad_norm": 0.07467235624790192, "learning_rate": 0.01, "loss": 2.1556, "step": 5976 }, { "epoch": 0.6143018596527279, "grad_norm": 0.06621374934911728, "learning_rate": 0.01, "loss": 2.1498, "step": 5979 }, { "epoch": 0.6146100893866229, "grad_norm": 0.0538405105471611, "learning_rate": 0.01, "loss": 2.1694, "step": 5982 }, { "epoch": 0.6149183191205179, "grad_norm": 0.09891212731599808, "learning_rate": 0.01, "loss": 2.1598, "step": 5985 }, { "epoch": 0.6152265488544129, "grad_norm": 0.042064208537340164, "learning_rate": 0.01, "loss": 2.1375, "step": 5988 }, { "epoch": 0.6155347785883079, "grad_norm": 0.06750064343214035, "learning_rate": 0.01, "loss": 2.1371, "step": 5991 }, { "epoch": 0.6158430083222028, "grad_norm": 0.0626809298992157, "learning_rate": 0.01, "loss": 2.1455, "step": 5994 }, { "epoch": 0.6161512380560978, "grad_norm": 0.04291335120797157, "learning_rate": 0.01, "loss": 2.1397, "step": 5997 }, { "epoch": 0.6164594677899928, "grad_norm": 0.05945251137018204, "learning_rate": 0.01, "loss": 2.1346, "step": 6000 }, { "epoch": 0.6167676975238878, "grad_norm": 0.15699933469295502, "learning_rate": 0.01, "loss": 2.1384, "step": 6003 }, { "epoch": 0.6170759272577828, "grad_norm": 0.06863987445831299, "learning_rate": 0.01, "loss": 2.1401, "step": 6006 }, { "epoch": 0.6173841569916778, "grad_norm": 0.04850529506802559, "learning_rate": 0.01, "loss": 2.1637, "step": 6009 }, { "epoch": 0.6176923867255728, "grad_norm": 0.05660491809248924, "learning_rate": 0.01, "loss": 2.1721, "step": 6012 }, { "epoch": 0.6180006164594678, "grad_norm": 0.050568364560604095, "learning_rate": 0.01, "loss": 2.1676, "step": 6015 }, { "epoch": 0.6183088461933628, "grad_norm": 0.060765717178583145, "learning_rate": 0.01, "loss": 2.127, "step": 6018 }, { "epoch": 0.6186170759272578, "grad_norm": 0.0731448233127594, "learning_rate": 0.01, "loss": 2.1531, "step": 6021 }, { "epoch": 0.6189253056611528, "grad_norm": 0.055431608110666275, "learning_rate": 0.01, "loss": 2.1662, "step": 6024 }, { "epoch": 0.6192335353950478, "grad_norm": 0.05376220867037773, "learning_rate": 0.01, "loss": 2.1465, "step": 6027 }, { "epoch": 0.6195417651289428, "grad_norm": 0.09729186445474625, "learning_rate": 0.01, "loss": 2.161, "step": 6030 }, { "epoch": 0.6198499948628378, "grad_norm": 0.08046093583106995, "learning_rate": 0.01, "loss": 2.1435, "step": 6033 }, { "epoch": 0.6201582245967328, "grad_norm": 0.09514495730400085, "learning_rate": 0.01, "loss": 2.1511, "step": 6036 }, { "epoch": 0.6204664543306277, "grad_norm": 0.056993287056684494, "learning_rate": 0.01, "loss": 2.1439, "step": 6039 }, { "epoch": 0.6207746840645227, "grad_norm": 0.06429582834243774, "learning_rate": 0.01, "loss": 2.1393, "step": 6042 }, { "epoch": 0.6210829137984177, "grad_norm": 0.1299380660057068, "learning_rate": 0.01, "loss": 2.1831, "step": 6045 }, { "epoch": 0.6213911435323127, "grad_norm": 0.13815906643867493, "learning_rate": 0.01, "loss": 2.1645, "step": 6048 }, { "epoch": 0.6216993732662077, "grad_norm": 0.056314874440431595, "learning_rate": 0.01, "loss": 2.1417, "step": 6051 }, { "epoch": 0.6220076030001027, "grad_norm": 0.06146218627691269, "learning_rate": 0.01, "loss": 2.1418, "step": 6054 }, { "epoch": 0.6223158327339977, "grad_norm": 0.062167149037122726, "learning_rate": 0.01, "loss": 2.1778, "step": 6057 }, { "epoch": 0.6226240624678927, "grad_norm": 0.059581879526376724, "learning_rate": 0.01, "loss": 2.1725, "step": 6060 }, { "epoch": 0.6229322922017877, "grad_norm": 0.044389910995960236, "learning_rate": 0.01, "loss": 2.1553, "step": 6063 }, { "epoch": 0.6232405219356827, "grad_norm": 0.036525238305330276, "learning_rate": 0.01, "loss": 2.1545, "step": 6066 }, { "epoch": 0.6235487516695777, "grad_norm": 0.0995573177933693, "learning_rate": 0.01, "loss": 2.1566, "step": 6069 }, { "epoch": 0.6238569814034727, "grad_norm": 0.10412520170211792, "learning_rate": 0.01, "loss": 2.1525, "step": 6072 }, { "epoch": 0.6241652111373677, "grad_norm": 0.10417335480451584, "learning_rate": 0.01, "loss": 2.1535, "step": 6075 }, { "epoch": 0.6244734408712627, "grad_norm": 0.09024351090192795, "learning_rate": 0.01, "loss": 2.1551, "step": 6078 }, { "epoch": 0.6247816706051577, "grad_norm": 0.04889573156833649, "learning_rate": 0.01, "loss": 2.1549, "step": 6081 }, { "epoch": 0.6250899003390528, "grad_norm": 0.05154373124241829, "learning_rate": 0.01, "loss": 2.1461, "step": 6084 }, { "epoch": 0.6253981300729478, "grad_norm": 0.04337237402796745, "learning_rate": 0.01, "loss": 2.1733, "step": 6087 }, { "epoch": 0.6257063598068427, "grad_norm": 0.06173473224043846, "learning_rate": 0.01, "loss": 2.1657, "step": 6090 }, { "epoch": 0.6260145895407377, "grad_norm": 0.06174352392554283, "learning_rate": 0.01, "loss": 2.1528, "step": 6093 }, { "epoch": 0.6263228192746327, "grad_norm": 0.07301110029220581, "learning_rate": 0.01, "loss": 2.1489, "step": 6096 }, { "epoch": 0.6266310490085277, "grad_norm": 0.04265190288424492, "learning_rate": 0.01, "loss": 2.1624, "step": 6099 }, { "epoch": 0.6269392787424227, "grad_norm": 0.056723251938819885, "learning_rate": 0.01, "loss": 2.1624, "step": 6102 }, { "epoch": 0.6272475084763177, "grad_norm": 0.06809309124946594, "learning_rate": 0.01, "loss": 2.1525, "step": 6105 }, { "epoch": 0.6275557382102127, "grad_norm": 0.06820474565029144, "learning_rate": 0.01, "loss": 2.1472, "step": 6108 }, { "epoch": 0.6278639679441077, "grad_norm": 0.05961904302239418, "learning_rate": 0.01, "loss": 2.1561, "step": 6111 }, { "epoch": 0.6281721976780027, "grad_norm": 0.04617665335536003, "learning_rate": 0.01, "loss": 2.1475, "step": 6114 }, { "epoch": 0.6284804274118977, "grad_norm": 0.040670618414878845, "learning_rate": 0.01, "loss": 2.153, "step": 6117 }, { "epoch": 0.6287886571457927, "grad_norm": 0.09909021109342575, "learning_rate": 0.01, "loss": 2.141, "step": 6120 }, { "epoch": 0.6290968868796877, "grad_norm": 0.04966261237859726, "learning_rate": 0.01, "loss": 2.1264, "step": 6123 }, { "epoch": 0.6294051166135827, "grad_norm": 0.0570046491920948, "learning_rate": 0.01, "loss": 2.1572, "step": 6126 }, { "epoch": 0.6297133463474777, "grad_norm": 0.10374405980110168, "learning_rate": 0.01, "loss": 2.149, "step": 6129 }, { "epoch": 0.6300215760813727, "grad_norm": 0.061325542628765106, "learning_rate": 0.01, "loss": 2.1521, "step": 6132 }, { "epoch": 0.6303298058152677, "grad_norm": 0.16151310503482819, "learning_rate": 0.01, "loss": 2.1825, "step": 6135 }, { "epoch": 0.6306380355491626, "grad_norm": 0.0921199768781662, "learning_rate": 0.01, "loss": 2.1773, "step": 6138 }, { "epoch": 0.6309462652830576, "grad_norm": 0.05603238567709923, "learning_rate": 0.01, "loss": 2.1452, "step": 6141 }, { "epoch": 0.6312544950169526, "grad_norm": 0.12173126637935638, "learning_rate": 0.01, "loss": 2.1713, "step": 6144 }, { "epoch": 0.6315627247508476, "grad_norm": 0.04609265923500061, "learning_rate": 0.01, "loss": 2.1518, "step": 6147 }, { "epoch": 0.6318709544847426, "grad_norm": 0.06445127725601196, "learning_rate": 0.01, "loss": 2.162, "step": 6150 }, { "epoch": 0.6321791842186376, "grad_norm": 0.05396106466650963, "learning_rate": 0.01, "loss": 2.1248, "step": 6153 }, { "epoch": 0.6324874139525326, "grad_norm": 0.06955734640359879, "learning_rate": 0.01, "loss": 2.1497, "step": 6156 }, { "epoch": 0.6327956436864276, "grad_norm": 0.04371445253491402, "learning_rate": 0.01, "loss": 2.1167, "step": 6159 }, { "epoch": 0.6331038734203226, "grad_norm": 0.07146921008825302, "learning_rate": 0.01, "loss": 2.1633, "step": 6162 }, { "epoch": 0.6334121031542176, "grad_norm": 0.08056561648845673, "learning_rate": 0.01, "loss": 2.1506, "step": 6165 }, { "epoch": 0.6337203328881126, "grad_norm": 0.08875605463981628, "learning_rate": 0.01, "loss": 2.1834, "step": 6168 }, { "epoch": 0.6340285626220076, "grad_norm": 0.05090434104204178, "learning_rate": 0.01, "loss": 2.1514, "step": 6171 }, { "epoch": 0.6343367923559026, "grad_norm": 0.11710961163043976, "learning_rate": 0.01, "loss": 2.1589, "step": 6174 }, { "epoch": 0.6346450220897976, "grad_norm": 0.04704523831605911, "learning_rate": 0.01, "loss": 2.1469, "step": 6177 }, { "epoch": 0.6349532518236926, "grad_norm": 0.045143596827983856, "learning_rate": 0.01, "loss": 2.1311, "step": 6180 }, { "epoch": 0.6352614815575875, "grad_norm": 0.04246919974684715, "learning_rate": 0.01, "loss": 2.1481, "step": 6183 }, { "epoch": 0.6355697112914825, "grad_norm": 0.04303867742419243, "learning_rate": 0.01, "loss": 2.1557, "step": 6186 }, { "epoch": 0.6358779410253775, "grad_norm": 0.17376503348350525, "learning_rate": 0.01, "loss": 2.1616, "step": 6189 }, { "epoch": 0.6361861707592725, "grad_norm": 0.11983154714107513, "learning_rate": 0.01, "loss": 2.1569, "step": 6192 }, { "epoch": 0.6364944004931675, "grad_norm": 0.0443497858941555, "learning_rate": 0.01, "loss": 2.1454, "step": 6195 }, { "epoch": 0.6368026302270625, "grad_norm": 0.04241250827908516, "learning_rate": 0.01, "loss": 2.1409, "step": 6198 }, { "epoch": 0.6371108599609576, "grad_norm": 0.07058902829885483, "learning_rate": 0.01, "loss": 2.1246, "step": 6201 }, { "epoch": 0.6374190896948526, "grad_norm": 0.060852985829114914, "learning_rate": 0.01, "loss": 2.1512, "step": 6204 }, { "epoch": 0.6377273194287476, "grad_norm": 0.058703117072582245, "learning_rate": 0.01, "loss": 2.1114, "step": 6207 }, { "epoch": 0.6380355491626426, "grad_norm": 0.08501632511615753, "learning_rate": 0.01, "loss": 2.1818, "step": 6210 }, { "epoch": 0.6383437788965376, "grad_norm": 0.07715412974357605, "learning_rate": 0.01, "loss": 2.1661, "step": 6213 }, { "epoch": 0.6386520086304326, "grad_norm": 0.06822165101766586, "learning_rate": 0.01, "loss": 2.1652, "step": 6216 }, { "epoch": 0.6389602383643276, "grad_norm": 0.048459213227033615, "learning_rate": 0.01, "loss": 2.1311, "step": 6219 }, { "epoch": 0.6392684680982226, "grad_norm": 0.08208850026130676, "learning_rate": 0.01, "loss": 2.1316, "step": 6222 }, { "epoch": 0.6395766978321176, "grad_norm": 0.06399821490049362, "learning_rate": 0.01, "loss": 2.1354, "step": 6225 }, { "epoch": 0.6398849275660126, "grad_norm": 0.12036826461553574, "learning_rate": 0.01, "loss": 2.1509, "step": 6228 }, { "epoch": 0.6401931572999076, "grad_norm": 0.08180755376815796, "learning_rate": 0.01, "loss": 2.1571, "step": 6231 }, { "epoch": 0.6405013870338025, "grad_norm": 0.053771521896123886, "learning_rate": 0.01, "loss": 2.1485, "step": 6234 }, { "epoch": 0.6408096167676975, "grad_norm": 0.042291607707738876, "learning_rate": 0.01, "loss": 2.1606, "step": 6237 }, { "epoch": 0.6411178465015925, "grad_norm": 0.044655315577983856, "learning_rate": 0.01, "loss": 2.1592, "step": 6240 }, { "epoch": 0.6414260762354875, "grad_norm": 0.07763859629631042, "learning_rate": 0.01, "loss": 2.1543, "step": 6243 }, { "epoch": 0.6417343059693825, "grad_norm": 0.055368274450302124, "learning_rate": 0.01, "loss": 2.1643, "step": 6246 }, { "epoch": 0.6420425357032775, "grad_norm": 0.047774944454431534, "learning_rate": 0.01, "loss": 2.1542, "step": 6249 }, { "epoch": 0.6423507654371725, "grad_norm": 0.06478223204612732, "learning_rate": 0.01, "loss": 2.1501, "step": 6252 }, { "epoch": 0.6426589951710675, "grad_norm": 0.03782160207629204, "learning_rate": 0.01, "loss": 2.1455, "step": 6255 }, { "epoch": 0.6429672249049625, "grad_norm": 0.11297930777072906, "learning_rate": 0.01, "loss": 2.1667, "step": 6258 }, { "epoch": 0.6432754546388575, "grad_norm": 0.09408997744321823, "learning_rate": 0.01, "loss": 2.146, "step": 6261 }, { "epoch": 0.6435836843727525, "grad_norm": 0.06677352637052536, "learning_rate": 0.01, "loss": 2.1723, "step": 6264 }, { "epoch": 0.6438919141066475, "grad_norm": 0.08687873184680939, "learning_rate": 0.01, "loss": 2.1517, "step": 6267 }, { "epoch": 0.6442001438405425, "grad_norm": 0.06850516051054001, "learning_rate": 0.01, "loss": 2.148, "step": 6270 }, { "epoch": 0.6445083735744375, "grad_norm": 0.07705084979534149, "learning_rate": 0.01, "loss": 2.1567, "step": 6273 }, { "epoch": 0.6448166033083325, "grad_norm": 0.1622423529624939, "learning_rate": 0.01, "loss": 2.1676, "step": 6276 }, { "epoch": 0.6451248330422275, "grad_norm": 0.11197759211063385, "learning_rate": 0.01, "loss": 2.1376, "step": 6279 }, { "epoch": 0.6454330627761224, "grad_norm": 0.06562814861536026, "learning_rate": 0.01, "loss": 2.1652, "step": 6282 }, { "epoch": 0.6457412925100174, "grad_norm": 0.0867902860045433, "learning_rate": 0.01, "loss": 2.1733, "step": 6285 }, { "epoch": 0.6460495222439124, "grad_norm": 0.08153738081455231, "learning_rate": 0.01, "loss": 2.1442, "step": 6288 }, { "epoch": 0.6463577519778074, "grad_norm": 0.09800709784030914, "learning_rate": 0.01, "loss": 2.1262, "step": 6291 }, { "epoch": 0.6466659817117024, "grad_norm": 0.07728230953216553, "learning_rate": 0.01, "loss": 2.139, "step": 6294 }, { "epoch": 0.6469742114455974, "grad_norm": 0.09658671170473099, "learning_rate": 0.01, "loss": 2.1421, "step": 6297 }, { "epoch": 0.6472824411794924, "grad_norm": 0.0448787659406662, "learning_rate": 0.01, "loss": 2.1415, "step": 6300 }, { "epoch": 0.6475906709133874, "grad_norm": 0.03848707675933838, "learning_rate": 0.01, "loss": 2.1209, "step": 6303 }, { "epoch": 0.6478989006472824, "grad_norm": 0.07465004920959473, "learning_rate": 0.01, "loss": 2.1395, "step": 6306 }, { "epoch": 0.6482071303811774, "grad_norm": 0.060424912720918655, "learning_rate": 0.01, "loss": 2.1806, "step": 6309 }, { "epoch": 0.6485153601150724, "grad_norm": 0.05204974114894867, "learning_rate": 0.01, "loss": 2.1287, "step": 6312 }, { "epoch": 0.6488235898489675, "grad_norm": 0.06045055389404297, "learning_rate": 0.01, "loss": 2.1727, "step": 6315 }, { "epoch": 0.6491318195828625, "grad_norm": 0.04978582262992859, "learning_rate": 0.01, "loss": 2.1264, "step": 6318 }, { "epoch": 0.6494400493167575, "grad_norm": 0.08131048828363419, "learning_rate": 0.01, "loss": 2.137, "step": 6321 }, { "epoch": 0.6497482790506525, "grad_norm": 0.09749994426965714, "learning_rate": 0.01, "loss": 2.1557, "step": 6324 }, { "epoch": 0.6500565087845475, "grad_norm": 0.06079535186290741, "learning_rate": 0.01, "loss": 2.1432, "step": 6327 }, { "epoch": 0.6503647385184425, "grad_norm": 0.08241060376167297, "learning_rate": 0.01, "loss": 2.1551, "step": 6330 }, { "epoch": 0.6506729682523374, "grad_norm": 0.12339378148317337, "learning_rate": 0.01, "loss": 2.1216, "step": 6333 }, { "epoch": 0.6509811979862324, "grad_norm": 0.0660511702299118, "learning_rate": 0.01, "loss": 2.1156, "step": 6336 }, { "epoch": 0.6512894277201274, "grad_norm": 0.06279938668012619, "learning_rate": 0.01, "loss": 2.1778, "step": 6339 }, { "epoch": 0.6515976574540224, "grad_norm": 0.068712018430233, "learning_rate": 0.01, "loss": 2.1348, "step": 6342 }, { "epoch": 0.6519058871879174, "grad_norm": 0.05808734893798828, "learning_rate": 0.01, "loss": 2.135, "step": 6345 }, { "epoch": 0.6522141169218124, "grad_norm": 0.044942643493413925, "learning_rate": 0.01, "loss": 2.1613, "step": 6348 }, { "epoch": 0.6525223466557074, "grad_norm": 0.11666214466094971, "learning_rate": 0.01, "loss": 2.1399, "step": 6351 }, { "epoch": 0.6528305763896024, "grad_norm": 0.06776747852563858, "learning_rate": 0.01, "loss": 2.1369, "step": 6354 }, { "epoch": 0.6531388061234974, "grad_norm": 0.10171874612569809, "learning_rate": 0.01, "loss": 2.1273, "step": 6357 }, { "epoch": 0.6534470358573924, "grad_norm": 0.04611232131719589, "learning_rate": 0.01, "loss": 2.1482, "step": 6360 }, { "epoch": 0.6537552655912874, "grad_norm": 0.042139992117881775, "learning_rate": 0.01, "loss": 2.1235, "step": 6363 }, { "epoch": 0.6540634953251824, "grad_norm": 0.057816632091999054, "learning_rate": 0.01, "loss": 2.1449, "step": 6366 }, { "epoch": 0.6543717250590774, "grad_norm": 0.11400949209928513, "learning_rate": 0.01, "loss": 2.1566, "step": 6369 }, { "epoch": 0.6546799547929724, "grad_norm": 0.07320736348628998, "learning_rate": 0.01, "loss": 2.1682, "step": 6372 }, { "epoch": 0.6549881845268674, "grad_norm": 0.07262291014194489, "learning_rate": 0.01, "loss": 2.1514, "step": 6375 }, { "epoch": 0.6552964142607623, "grad_norm": 0.05559679865837097, "learning_rate": 0.01, "loss": 2.1347, "step": 6378 }, { "epoch": 0.6556046439946573, "grad_norm": 0.049424149096012115, "learning_rate": 0.01, "loss": 2.1423, "step": 6381 }, { "epoch": 0.6559128737285523, "grad_norm": 0.05457301065325737, "learning_rate": 0.01, "loss": 2.1425, "step": 6384 }, { "epoch": 0.6562211034624473, "grad_norm": 0.058564141392707825, "learning_rate": 0.01, "loss": 2.1158, "step": 6387 }, { "epoch": 0.6565293331963423, "grad_norm": 0.10944786667823792, "learning_rate": 0.01, "loss": 2.146, "step": 6390 }, { "epoch": 0.6568375629302373, "grad_norm": 0.07760695368051529, "learning_rate": 0.01, "loss": 2.176, "step": 6393 }, { "epoch": 0.6571457926641323, "grad_norm": 0.07621042430400848, "learning_rate": 0.01, "loss": 2.1779, "step": 6396 }, { "epoch": 0.6574540223980273, "grad_norm": 0.09723789244890213, "learning_rate": 0.01, "loss": 2.1455, "step": 6399 }, { "epoch": 0.6577622521319223, "grad_norm": 0.05648832768201828, "learning_rate": 0.01, "loss": 2.154, "step": 6402 }, { "epoch": 0.6580704818658173, "grad_norm": 0.04370080679655075, "learning_rate": 0.01, "loss": 2.1374, "step": 6405 }, { "epoch": 0.6583787115997123, "grad_norm": 0.03729141131043434, "learning_rate": 0.01, "loss": 2.1275, "step": 6408 }, { "epoch": 0.6586869413336073, "grad_norm": 0.055584125220775604, "learning_rate": 0.01, "loss": 2.1442, "step": 6411 }, { "epoch": 0.6589951710675023, "grad_norm": 0.07981918007135391, "learning_rate": 0.01, "loss": 2.1618, "step": 6414 }, { "epoch": 0.6593034008013973, "grad_norm": 0.09241674095392227, "learning_rate": 0.01, "loss": 2.1519, "step": 6417 }, { "epoch": 0.6596116305352923, "grad_norm": 0.10454630106687546, "learning_rate": 0.01, "loss": 2.1309, "step": 6420 }, { "epoch": 0.6599198602691873, "grad_norm": 0.08674053847789764, "learning_rate": 0.01, "loss": 2.1617, "step": 6423 }, { "epoch": 0.6602280900030822, "grad_norm": 0.06003529578447342, "learning_rate": 0.01, "loss": 2.1475, "step": 6426 }, { "epoch": 0.6605363197369772, "grad_norm": 0.07370956987142563, "learning_rate": 0.01, "loss": 2.1466, "step": 6429 }, { "epoch": 0.6608445494708723, "grad_norm": 0.05090004578232765, "learning_rate": 0.01, "loss": 2.1506, "step": 6432 }, { "epoch": 0.6611527792047673, "grad_norm": 0.06062362715601921, "learning_rate": 0.01, "loss": 2.1601, "step": 6435 }, { "epoch": 0.6614610089386623, "grad_norm": 0.05484107881784439, "learning_rate": 0.01, "loss": 2.1452, "step": 6438 }, { "epoch": 0.6617692386725573, "grad_norm": 0.1367156058549881, "learning_rate": 0.01, "loss": 2.1586, "step": 6441 }, { "epoch": 0.6620774684064523, "grad_norm": 0.05140338093042374, "learning_rate": 0.01, "loss": 2.1463, "step": 6444 }, { "epoch": 0.6623856981403473, "grad_norm": 0.09168683737516403, "learning_rate": 0.01, "loss": 2.1467, "step": 6447 }, { "epoch": 0.6626939278742423, "grad_norm": 0.04098822921514511, "learning_rate": 0.01, "loss": 2.1648, "step": 6450 }, { "epoch": 0.6630021576081373, "grad_norm": 0.049763478338718414, "learning_rate": 0.01, "loss": 2.1289, "step": 6453 }, { "epoch": 0.6633103873420323, "grad_norm": 0.060069404542446136, "learning_rate": 0.01, "loss": 2.1467, "step": 6456 }, { "epoch": 0.6636186170759273, "grad_norm": 0.06611450761556625, "learning_rate": 0.01, "loss": 2.1599, "step": 6459 }, { "epoch": 0.6639268468098223, "grad_norm": 0.04955270141363144, "learning_rate": 0.01, "loss": 2.136, "step": 6462 }, { "epoch": 0.6642350765437173, "grad_norm": 0.04004522040486336, "learning_rate": 0.01, "loss": 2.1457, "step": 6465 }, { "epoch": 0.6645433062776123, "grad_norm": 0.06539756804704666, "learning_rate": 0.01, "loss": 2.1458, "step": 6468 }, { "epoch": 0.6648515360115073, "grad_norm": 0.10684728622436523, "learning_rate": 0.01, "loss": 2.1279, "step": 6471 }, { "epoch": 0.6651597657454023, "grad_norm": 0.09936464577913284, "learning_rate": 0.01, "loss": 2.1767, "step": 6474 }, { "epoch": 0.6654679954792972, "grad_norm": 0.04908827692270279, "learning_rate": 0.01, "loss": 2.1259, "step": 6477 }, { "epoch": 0.6657762252131922, "grad_norm": 0.048053622245788574, "learning_rate": 0.01, "loss": 2.1718, "step": 6480 }, { "epoch": 0.6660844549470872, "grad_norm": 0.05524458363652229, "learning_rate": 0.01, "loss": 2.1673, "step": 6483 }, { "epoch": 0.6663926846809822, "grad_norm": 0.05107030272483826, "learning_rate": 0.01, "loss": 2.13, "step": 6486 }, { "epoch": 0.6667009144148772, "grad_norm": 0.12472579628229141, "learning_rate": 0.01, "loss": 2.149, "step": 6489 }, { "epoch": 0.6670091441487722, "grad_norm": 0.05257454514503479, "learning_rate": 0.01, "loss": 2.1343, "step": 6492 }, { "epoch": 0.6673173738826672, "grad_norm": 0.05986837297677994, "learning_rate": 0.01, "loss": 2.1265, "step": 6495 }, { "epoch": 0.6676256036165622, "grad_norm": 0.08322940021753311, "learning_rate": 0.01, "loss": 2.1317, "step": 6498 }, { "epoch": 0.6679338333504572, "grad_norm": 0.0466473363339901, "learning_rate": 0.01, "loss": 2.1235, "step": 6501 }, { "epoch": 0.6682420630843522, "grad_norm": 0.05092160776257515, "learning_rate": 0.01, "loss": 2.1672, "step": 6504 }, { "epoch": 0.6685502928182472, "grad_norm": 0.08392294496297836, "learning_rate": 0.01, "loss": 2.1473, "step": 6507 }, { "epoch": 0.6688585225521422, "grad_norm": 0.042165517807006836, "learning_rate": 0.01, "loss": 2.1181, "step": 6510 }, { "epoch": 0.6691667522860372, "grad_norm": 0.06214481219649315, "learning_rate": 0.01, "loss": 2.138, "step": 6513 }, { "epoch": 0.6694749820199322, "grad_norm": 0.06087846681475639, "learning_rate": 0.01, "loss": 2.15, "step": 6516 }, { "epoch": 0.6697832117538272, "grad_norm": 0.047256652265787125, "learning_rate": 0.01, "loss": 2.1433, "step": 6519 }, { "epoch": 0.6700914414877222, "grad_norm": 0.10626421123743057, "learning_rate": 0.01, "loss": 2.156, "step": 6522 }, { "epoch": 0.6703996712216171, "grad_norm": 0.09426552802324295, "learning_rate": 0.01, "loss": 2.1472, "step": 6525 }, { "epoch": 0.6707079009555121, "grad_norm": 0.0632442831993103, "learning_rate": 0.01, "loss": 2.1536, "step": 6528 }, { "epoch": 0.6710161306894071, "grad_norm": 0.07149971276521683, "learning_rate": 0.01, "loss": 2.1694, "step": 6531 }, { "epoch": 0.6713243604233021, "grad_norm": 0.04060966521501541, "learning_rate": 0.01, "loss": 2.164, "step": 6534 }, { "epoch": 0.6716325901571971, "grad_norm": 0.20043891668319702, "learning_rate": 0.01, "loss": 2.125, "step": 6537 }, { "epoch": 0.6719408198910921, "grad_norm": 0.06755783408880234, "learning_rate": 0.01, "loss": 2.15, "step": 6540 }, { "epoch": 0.6722490496249871, "grad_norm": 0.0509268082678318, "learning_rate": 0.01, "loss": 2.1405, "step": 6543 }, { "epoch": 0.6725572793588821, "grad_norm": 0.04033916816115379, "learning_rate": 0.01, "loss": 2.136, "step": 6546 }, { "epoch": 0.6728655090927772, "grad_norm": 0.04707946255803108, "learning_rate": 0.01, "loss": 2.1514, "step": 6549 }, { "epoch": 0.6731737388266722, "grad_norm": 0.04360898956656456, "learning_rate": 0.01, "loss": 2.1518, "step": 6552 }, { "epoch": 0.6734819685605672, "grad_norm": 0.11959343403577805, "learning_rate": 0.01, "loss": 2.1377, "step": 6555 }, { "epoch": 0.6737901982944622, "grad_norm": 0.06620760262012482, "learning_rate": 0.01, "loss": 2.1419, "step": 6558 }, { "epoch": 0.6740984280283572, "grad_norm": 0.056747015565633774, "learning_rate": 0.01, "loss": 2.138, "step": 6561 }, { "epoch": 0.6744066577622522, "grad_norm": 0.05230560526251793, "learning_rate": 0.01, "loss": 2.1335, "step": 6564 }, { "epoch": 0.6747148874961472, "grad_norm": 0.0526299811899662, "learning_rate": 0.01, "loss": 2.131, "step": 6567 }, { "epoch": 0.6750231172300422, "grad_norm": 0.15683774650096893, "learning_rate": 0.01, "loss": 2.1167, "step": 6570 }, { "epoch": 0.6753313469639372, "grad_norm": 0.10133557766675949, "learning_rate": 0.01, "loss": 2.1219, "step": 6573 }, { "epoch": 0.6756395766978321, "grad_norm": 0.06826774775981903, "learning_rate": 0.01, "loss": 2.1416, "step": 6576 }, { "epoch": 0.6759478064317271, "grad_norm": 0.046236682683229446, "learning_rate": 0.01, "loss": 2.1704, "step": 6579 }, { "epoch": 0.6762560361656221, "grad_norm": 0.07654762268066406, "learning_rate": 0.01, "loss": 2.1411, "step": 6582 }, { "epoch": 0.6765642658995171, "grad_norm": 0.07760706543922424, "learning_rate": 0.01, "loss": 2.168, "step": 6585 }, { "epoch": 0.6768724956334121, "grad_norm": 0.04213540256023407, "learning_rate": 0.01, "loss": 2.1899, "step": 6588 }, { "epoch": 0.6771807253673071, "grad_norm": 0.0517420619726181, "learning_rate": 0.01, "loss": 2.1561, "step": 6591 }, { "epoch": 0.6774889551012021, "grad_norm": 0.04073292762041092, "learning_rate": 0.01, "loss": 2.1475, "step": 6594 }, { "epoch": 0.6777971848350971, "grad_norm": 0.11223835498094559, "learning_rate": 0.01, "loss": 2.1102, "step": 6597 }, { "epoch": 0.6781054145689921, "grad_norm": 0.08094224333763123, "learning_rate": 0.01, "loss": 2.1537, "step": 6600 }, { "epoch": 0.6784136443028871, "grad_norm": 0.036313675343990326, "learning_rate": 0.01, "loss": 2.1471, "step": 6603 }, { "epoch": 0.6787218740367821, "grad_norm": 0.09553749114274979, "learning_rate": 0.01, "loss": 2.1445, "step": 6606 }, { "epoch": 0.6790301037706771, "grad_norm": 0.07334265112876892, "learning_rate": 0.01, "loss": 2.1594, "step": 6609 }, { "epoch": 0.6793383335045721, "grad_norm": 0.12031051516532898, "learning_rate": 0.01, "loss": 2.1321, "step": 6612 }, { "epoch": 0.6796465632384671, "grad_norm": 0.08834968507289886, "learning_rate": 0.01, "loss": 2.1474, "step": 6615 }, { "epoch": 0.679954792972362, "grad_norm": 0.05016850307583809, "learning_rate": 0.01, "loss": 2.1582, "step": 6618 }, { "epoch": 0.680263022706257, "grad_norm": 0.039213377982378006, "learning_rate": 0.01, "loss": 2.1461, "step": 6621 }, { "epoch": 0.680571252440152, "grad_norm": 0.035611145198345184, "learning_rate": 0.01, "loss": 2.137, "step": 6624 }, { "epoch": 0.680879482174047, "grad_norm": 0.09345167875289917, "learning_rate": 0.01, "loss": 2.1357, "step": 6627 }, { "epoch": 0.681187711907942, "grad_norm": 0.04311450198292732, "learning_rate": 0.01, "loss": 2.1413, "step": 6630 }, { "epoch": 0.681495941641837, "grad_norm": 0.040315765887498856, "learning_rate": 0.01, "loss": 2.1091, "step": 6633 }, { "epoch": 0.681804171375732, "grad_norm": 0.11044291406869888, "learning_rate": 0.01, "loss": 2.1392, "step": 6636 }, { "epoch": 0.682112401109627, "grad_norm": 0.1288553774356842, "learning_rate": 0.01, "loss": 2.1129, "step": 6639 }, { "epoch": 0.682420630843522, "grad_norm": 0.0698169469833374, "learning_rate": 0.01, "loss": 2.137, "step": 6642 }, { "epoch": 0.682728860577417, "grad_norm": 0.037890784442424774, "learning_rate": 0.01, "loss": 2.1195, "step": 6645 }, { "epoch": 0.683037090311312, "grad_norm": 0.07425201684236526, "learning_rate": 0.01, "loss": 2.1194, "step": 6648 }, { "epoch": 0.683345320045207, "grad_norm": 0.058168716728687286, "learning_rate": 0.01, "loss": 2.1371, "step": 6651 }, { "epoch": 0.683653549779102, "grad_norm": 0.05515358969569206, "learning_rate": 0.01, "loss": 2.137, "step": 6654 }, { "epoch": 0.683961779512997, "grad_norm": 0.0501445047557354, "learning_rate": 0.01, "loss": 2.1539, "step": 6657 }, { "epoch": 0.684270009246892, "grad_norm": 0.06167145445942879, "learning_rate": 0.01, "loss": 2.1413, "step": 6660 }, { "epoch": 0.6845782389807871, "grad_norm": 0.0841723158955574, "learning_rate": 0.01, "loss": 2.1194, "step": 6663 }, { "epoch": 0.6848864687146821, "grad_norm": 0.06027607619762421, "learning_rate": 0.01, "loss": 2.158, "step": 6666 }, { "epoch": 0.685194698448577, "grad_norm": 0.1187741607427597, "learning_rate": 0.01, "loss": 2.1651, "step": 6669 }, { "epoch": 0.685502928182472, "grad_norm": 0.10789939761161804, "learning_rate": 0.01, "loss": 2.1465, "step": 6672 }, { "epoch": 0.685811157916367, "grad_norm": 0.06254967302083969, "learning_rate": 0.01, "loss": 2.1639, "step": 6675 }, { "epoch": 0.686119387650262, "grad_norm": 0.04242802783846855, "learning_rate": 0.01, "loss": 2.1563, "step": 6678 }, { "epoch": 0.686427617384157, "grad_norm": 0.03538980334997177, "learning_rate": 0.01, "loss": 2.1373, "step": 6681 }, { "epoch": 0.686735847118052, "grad_norm": 0.04609490931034088, "learning_rate": 0.01, "loss": 2.1345, "step": 6684 }, { "epoch": 0.687044076851947, "grad_norm": 0.1298975795507431, "learning_rate": 0.01, "loss": 2.1446, "step": 6687 }, { "epoch": 0.687352306585842, "grad_norm": 0.10049281269311905, "learning_rate": 0.01, "loss": 2.1432, "step": 6690 }, { "epoch": 0.687660536319737, "grad_norm": 0.05908266827464104, "learning_rate": 0.01, "loss": 2.1288, "step": 6693 }, { "epoch": 0.687968766053632, "grad_norm": 0.0546141043305397, "learning_rate": 0.01, "loss": 2.1086, "step": 6696 }, { "epoch": 0.688276995787527, "grad_norm": 0.04135862737894058, "learning_rate": 0.01, "loss": 2.1187, "step": 6699 }, { "epoch": 0.688585225521422, "grad_norm": 0.03824761137366295, "learning_rate": 0.01, "loss": 2.1162, "step": 6702 }, { "epoch": 0.688893455255317, "grad_norm": 0.041454900056123734, "learning_rate": 0.01, "loss": 2.1304, "step": 6705 }, { "epoch": 0.689201684989212, "grad_norm": 0.08948934823274612, "learning_rate": 0.01, "loss": 2.1538, "step": 6708 }, { "epoch": 0.689509914723107, "grad_norm": 0.07379783689975739, "learning_rate": 0.01, "loss": 2.145, "step": 6711 }, { "epoch": 0.689818144457002, "grad_norm": 0.0833912044763565, "learning_rate": 0.01, "loss": 2.1218, "step": 6714 }, { "epoch": 0.690126374190897, "grad_norm": 0.05899098515510559, "learning_rate": 0.01, "loss": 2.1516, "step": 6717 }, { "epoch": 0.690434603924792, "grad_norm": 0.06462058424949646, "learning_rate": 0.01, "loss": 2.1496, "step": 6720 }, { "epoch": 0.6907428336586869, "grad_norm": 0.04040443152189255, "learning_rate": 0.01, "loss": 2.1311, "step": 6723 }, { "epoch": 0.6910510633925819, "grad_norm": 0.05336814373731613, "learning_rate": 0.01, "loss": 2.1227, "step": 6726 }, { "epoch": 0.6913592931264769, "grad_norm": 0.05057406798005104, "learning_rate": 0.01, "loss": 2.1281, "step": 6729 }, { "epoch": 0.6916675228603719, "grad_norm": 0.08063513040542603, "learning_rate": 0.01, "loss": 2.1318, "step": 6732 }, { "epoch": 0.6919757525942669, "grad_norm": 0.08304840326309204, "learning_rate": 0.01, "loss": 2.1179, "step": 6735 }, { "epoch": 0.6922839823281619, "grad_norm": 0.04266434162855148, "learning_rate": 0.01, "loss": 2.1447, "step": 6738 }, { "epoch": 0.6925922120620569, "grad_norm": 0.07502007484436035, "learning_rate": 0.01, "loss": 2.1173, "step": 6741 }, { "epoch": 0.6929004417959519, "grad_norm": 0.10870220512151718, "learning_rate": 0.01, "loss": 2.1555, "step": 6744 }, { "epoch": 0.6932086715298469, "grad_norm": 0.15824924409389496, "learning_rate": 0.01, "loss": 2.1668, "step": 6747 }, { "epoch": 0.6935169012637419, "grad_norm": 0.06319935619831085, "learning_rate": 0.01, "loss": 2.1788, "step": 6750 }, { "epoch": 0.6938251309976369, "grad_norm": 0.06392507255077362, "learning_rate": 0.01, "loss": 2.1398, "step": 6753 }, { "epoch": 0.6941333607315319, "grad_norm": 0.044481996446847916, "learning_rate": 0.01, "loss": 2.147, "step": 6756 }, { "epoch": 0.6944415904654269, "grad_norm": 0.09093592315912247, "learning_rate": 0.01, "loss": 2.1399, "step": 6759 }, { "epoch": 0.6947498201993219, "grad_norm": 0.09249415248632431, "learning_rate": 0.01, "loss": 2.1274, "step": 6762 }, { "epoch": 0.6950580499332168, "grad_norm": 0.06134162098169327, "learning_rate": 0.01, "loss": 2.142, "step": 6765 }, { "epoch": 0.6953662796671118, "grad_norm": 0.048883359879255295, "learning_rate": 0.01, "loss": 2.1357, "step": 6768 }, { "epoch": 0.6956745094010068, "grad_norm": 0.04553356394171715, "learning_rate": 0.01, "loss": 2.132, "step": 6771 }, { "epoch": 0.6959827391349018, "grad_norm": 0.10365505516529083, "learning_rate": 0.01, "loss": 2.1568, "step": 6774 }, { "epoch": 0.6962909688687968, "grad_norm": 0.07474958896636963, "learning_rate": 0.01, "loss": 2.1209, "step": 6777 }, { "epoch": 0.6965991986026919, "grad_norm": 0.11140461266040802, "learning_rate": 0.01, "loss": 2.1585, "step": 6780 }, { "epoch": 0.6969074283365869, "grad_norm": 0.0529690645635128, "learning_rate": 0.01, "loss": 2.1228, "step": 6783 }, { "epoch": 0.6972156580704819, "grad_norm": 0.06484264135360718, "learning_rate": 0.01, "loss": 2.117, "step": 6786 }, { "epoch": 0.6975238878043769, "grad_norm": 0.0467400886118412, "learning_rate": 0.01, "loss": 2.1367, "step": 6789 }, { "epoch": 0.6978321175382719, "grad_norm": 0.09690822660923004, "learning_rate": 0.01, "loss": 2.1275, "step": 6792 }, { "epoch": 0.6981403472721669, "grad_norm": 0.053299982100725174, "learning_rate": 0.01, "loss": 2.1557, "step": 6795 }, { "epoch": 0.6984485770060619, "grad_norm": 0.08451724797487259, "learning_rate": 0.01, "loss": 2.1235, "step": 6798 }, { "epoch": 0.6987568067399569, "grad_norm": 0.11180119216442108, "learning_rate": 0.01, "loss": 2.1389, "step": 6801 }, { "epoch": 0.6990650364738519, "grad_norm": 0.04366112872958183, "learning_rate": 0.01, "loss": 2.1345, "step": 6804 }, { "epoch": 0.6993732662077469, "grad_norm": 0.057021014392375946, "learning_rate": 0.01, "loss": 2.145, "step": 6807 }, { "epoch": 0.6996814959416419, "grad_norm": 0.050035975873470306, "learning_rate": 0.01, "loss": 2.1245, "step": 6810 }, { "epoch": 0.6999897256755369, "grad_norm": 0.16434957087039948, "learning_rate": 0.01, "loss": 2.099, "step": 6813 }, { "epoch": 0.7002979554094318, "grad_norm": 0.0473979152739048, "learning_rate": 0.01, "loss": 2.124, "step": 6816 }, { "epoch": 0.7006061851433268, "grad_norm": 0.06207640469074249, "learning_rate": 0.01, "loss": 2.1528, "step": 6819 }, { "epoch": 0.7009144148772218, "grad_norm": 0.09829109162092209, "learning_rate": 0.01, "loss": 2.1359, "step": 6822 }, { "epoch": 0.7012226446111168, "grad_norm": 0.0563257597386837, "learning_rate": 0.01, "loss": 2.1639, "step": 6825 }, { "epoch": 0.7015308743450118, "grad_norm": 0.12371699512004852, "learning_rate": 0.01, "loss": 2.1479, "step": 6828 }, { "epoch": 0.7018391040789068, "grad_norm": 0.07342347502708435, "learning_rate": 0.01, "loss": 2.1786, "step": 6831 }, { "epoch": 0.7021473338128018, "grad_norm": 0.05420146882534027, "learning_rate": 0.01, "loss": 2.1261, "step": 6834 }, { "epoch": 0.7024555635466968, "grad_norm": 0.04500873014330864, "learning_rate": 0.01, "loss": 2.1356, "step": 6837 }, { "epoch": 0.7027637932805918, "grad_norm": 0.10648415237665176, "learning_rate": 0.01, "loss": 2.1205, "step": 6840 }, { "epoch": 0.7030720230144868, "grad_norm": 0.05089351162314415, "learning_rate": 0.01, "loss": 2.1403, "step": 6843 }, { "epoch": 0.7033802527483818, "grad_norm": 0.10011807084083557, "learning_rate": 0.01, "loss": 2.1508, "step": 6846 }, { "epoch": 0.7036884824822768, "grad_norm": 0.06787194311618805, "learning_rate": 0.01, "loss": 2.1391, "step": 6849 }, { "epoch": 0.7039967122161718, "grad_norm": 0.08248817175626755, "learning_rate": 0.01, "loss": 2.1782, "step": 6852 }, { "epoch": 0.7043049419500668, "grad_norm": 0.04949905723333359, "learning_rate": 0.01, "loss": 2.1401, "step": 6855 }, { "epoch": 0.7046131716839618, "grad_norm": 0.043910931795835495, "learning_rate": 0.01, "loss": 2.108, "step": 6858 }, { "epoch": 0.7049214014178568, "grad_norm": 0.05133078247308731, "learning_rate": 0.01, "loss": 2.1088, "step": 6861 }, { "epoch": 0.7052296311517517, "grad_norm": 0.11582443863153458, "learning_rate": 0.01, "loss": 2.1301, "step": 6864 }, { "epoch": 0.7055378608856467, "grad_norm": 0.04287354275584221, "learning_rate": 0.01, "loss": 2.124, "step": 6867 }, { "epoch": 0.7058460906195417, "grad_norm": 0.09393726289272308, "learning_rate": 0.01, "loss": 2.1326, "step": 6870 }, { "epoch": 0.7061543203534367, "grad_norm": 0.1286250203847885, "learning_rate": 0.01, "loss": 2.1292, "step": 6873 }, { "epoch": 0.7064625500873317, "grad_norm": 0.14816388487815857, "learning_rate": 0.01, "loss": 2.1439, "step": 6876 }, { "epoch": 0.7067707798212267, "grad_norm": 0.062444012612104416, "learning_rate": 0.01, "loss": 2.1421, "step": 6879 }, { "epoch": 0.7070790095551217, "grad_norm": 0.053750455379486084, "learning_rate": 0.01, "loss": 2.1185, "step": 6882 }, { "epoch": 0.7073872392890167, "grad_norm": 0.051356710493564606, "learning_rate": 0.01, "loss": 2.1298, "step": 6885 }, { "epoch": 0.7076954690229117, "grad_norm": 0.061504025012254715, "learning_rate": 0.01, "loss": 2.1132, "step": 6888 }, { "epoch": 0.7080036987568067, "grad_norm": 0.056496761739254, "learning_rate": 0.01, "loss": 2.1019, "step": 6891 }, { "epoch": 0.7083119284907017, "grad_norm": 0.048710647970438004, "learning_rate": 0.01, "loss": 2.126, "step": 6894 }, { "epoch": 0.7086201582245968, "grad_norm": 0.06260757148265839, "learning_rate": 0.01, "loss": 2.1534, "step": 6897 }, { "epoch": 0.7089283879584918, "grad_norm": 0.06622278690338135, "learning_rate": 0.01, "loss": 2.1247, "step": 6900 }, { "epoch": 0.7092366176923868, "grad_norm": 0.0810452550649643, "learning_rate": 0.01, "loss": 2.1336, "step": 6903 }, { "epoch": 0.7095448474262818, "grad_norm": 0.04692875221371651, "learning_rate": 0.01, "loss": 2.1096, "step": 6906 }, { "epoch": 0.7098530771601768, "grad_norm": 0.04757360368967056, "learning_rate": 0.01, "loss": 2.1181, "step": 6909 }, { "epoch": 0.7101613068940718, "grad_norm": 0.05597659945487976, "learning_rate": 0.01, "loss": 2.1425, "step": 6912 }, { "epoch": 0.7104695366279667, "grad_norm": 0.051605843007564545, "learning_rate": 0.01, "loss": 2.1118, "step": 6915 }, { "epoch": 0.7107777663618617, "grad_norm": 0.06179991737008095, "learning_rate": 0.01, "loss": 2.1362, "step": 6918 }, { "epoch": 0.7110859960957567, "grad_norm": 0.05455191805958748, "learning_rate": 0.01, "loss": 2.1279, "step": 6921 }, { "epoch": 0.7113942258296517, "grad_norm": 0.11560655385255814, "learning_rate": 0.01, "loss": 2.1316, "step": 6924 }, { "epoch": 0.7117024555635467, "grad_norm": 0.12203246355056763, "learning_rate": 0.01, "loss": 2.1173, "step": 6927 }, { "epoch": 0.7120106852974417, "grad_norm": 0.07024069130420685, "learning_rate": 0.01, "loss": 2.1395, "step": 6930 }, { "epoch": 0.7123189150313367, "grad_norm": 0.04773107171058655, "learning_rate": 0.01, "loss": 2.1455, "step": 6933 }, { "epoch": 0.7126271447652317, "grad_norm": 0.06106821820139885, "learning_rate": 0.01, "loss": 2.1352, "step": 6936 }, { "epoch": 0.7129353744991267, "grad_norm": 0.11438222974538803, "learning_rate": 0.01, "loss": 2.149, "step": 6939 }, { "epoch": 0.7132436042330217, "grad_norm": 0.07224932312965393, "learning_rate": 0.01, "loss": 2.1234, "step": 6942 }, { "epoch": 0.7135518339669167, "grad_norm": 0.06790932267904282, "learning_rate": 0.01, "loss": 2.1222, "step": 6945 }, { "epoch": 0.7138600637008117, "grad_norm": 0.12322958558797836, "learning_rate": 0.01, "loss": 2.106, "step": 6948 }, { "epoch": 0.7141682934347067, "grad_norm": 0.07186157256364822, "learning_rate": 0.01, "loss": 2.1365, "step": 6951 }, { "epoch": 0.7144765231686017, "grad_norm": 0.05366130173206329, "learning_rate": 0.01, "loss": 2.1264, "step": 6954 }, { "epoch": 0.7147847529024967, "grad_norm": 0.06682512164115906, "learning_rate": 0.01, "loss": 2.1163, "step": 6957 }, { "epoch": 0.7150929826363916, "grad_norm": 0.04629479721188545, "learning_rate": 0.01, "loss": 2.126, "step": 6960 }, { "epoch": 0.7154012123702866, "grad_norm": 0.053164754062891006, "learning_rate": 0.01, "loss": 2.1262, "step": 6963 }, { "epoch": 0.7157094421041816, "grad_norm": 0.08918699622154236, "learning_rate": 0.01, "loss": 2.157, "step": 6966 }, { "epoch": 0.7160176718380766, "grad_norm": 0.06226164847612381, "learning_rate": 0.01, "loss": 2.1391, "step": 6969 }, { "epoch": 0.7163259015719716, "grad_norm": 0.08120178431272507, "learning_rate": 0.01, "loss": 2.1118, "step": 6972 }, { "epoch": 0.7166341313058666, "grad_norm": 0.06390135735273361, "learning_rate": 0.01, "loss": 2.1302, "step": 6975 }, { "epoch": 0.7169423610397616, "grad_norm": 0.039068643003702164, "learning_rate": 0.01, "loss": 2.1304, "step": 6978 }, { "epoch": 0.7172505907736566, "grad_norm": 0.05006824806332588, "learning_rate": 0.01, "loss": 2.1352, "step": 6981 }, { "epoch": 0.7175588205075516, "grad_norm": 0.03946538642048836, "learning_rate": 0.01, "loss": 2.1513, "step": 6984 }, { "epoch": 0.7178670502414466, "grad_norm": 0.05072702839970589, "learning_rate": 0.01, "loss": 2.1298, "step": 6987 }, { "epoch": 0.7181752799753416, "grad_norm": 0.06457548588514328, "learning_rate": 0.01, "loss": 2.1276, "step": 6990 }, { "epoch": 0.7184835097092366, "grad_norm": 0.05759236589074135, "learning_rate": 0.01, "loss": 2.1198, "step": 6993 }, { "epoch": 0.7187917394431316, "grad_norm": 0.1151571124792099, "learning_rate": 0.01, "loss": 2.1217, "step": 6996 }, { "epoch": 0.7190999691770266, "grad_norm": 0.04867241531610489, "learning_rate": 0.01, "loss": 2.1343, "step": 6999 }, { "epoch": 0.7194081989109216, "grad_norm": 0.074817955493927, "learning_rate": 0.01, "loss": 2.1474, "step": 7002 }, { "epoch": 0.7197164286448166, "grad_norm": 0.04749060794711113, "learning_rate": 0.01, "loss": 2.1403, "step": 7005 }, { "epoch": 0.7200246583787115, "grad_norm": 0.04965493455529213, "learning_rate": 0.01, "loss": 2.142, "step": 7008 }, { "epoch": 0.7203328881126067, "grad_norm": 0.044914234429597855, "learning_rate": 0.01, "loss": 2.1397, "step": 7011 }, { "epoch": 0.7206411178465016, "grad_norm": 0.06727777421474457, "learning_rate": 0.01, "loss": 2.1443, "step": 7014 }, { "epoch": 0.7209493475803966, "grad_norm": 0.10670837014913559, "learning_rate": 0.01, "loss": 2.1316, "step": 7017 }, { "epoch": 0.7212575773142916, "grad_norm": 0.05047740787267685, "learning_rate": 0.01, "loss": 2.1268, "step": 7020 }, { "epoch": 0.7215658070481866, "grad_norm": 0.055116791278123856, "learning_rate": 0.01, "loss": 2.1194, "step": 7023 }, { "epoch": 0.7218740367820816, "grad_norm": 0.04873311519622803, "learning_rate": 0.01, "loss": 2.1122, "step": 7026 }, { "epoch": 0.7221822665159766, "grad_norm": 0.0893159881234169, "learning_rate": 0.01, "loss": 2.1413, "step": 7029 }, { "epoch": 0.7224904962498716, "grad_norm": 0.07278893142938614, "learning_rate": 0.01, "loss": 2.1394, "step": 7032 }, { "epoch": 0.7227987259837666, "grad_norm": 0.09431196749210358, "learning_rate": 0.01, "loss": 2.1489, "step": 7035 }, { "epoch": 0.7231069557176616, "grad_norm": 0.03588537499308586, "learning_rate": 0.01, "loss": 2.1585, "step": 7038 }, { "epoch": 0.7234151854515566, "grad_norm": 0.044003136456012726, "learning_rate": 0.01, "loss": 2.1442, "step": 7041 }, { "epoch": 0.7237234151854516, "grad_norm": 0.10805044323205948, "learning_rate": 0.01, "loss": 2.127, "step": 7044 }, { "epoch": 0.7240316449193466, "grad_norm": 0.06328746676445007, "learning_rate": 0.01, "loss": 2.1166, "step": 7047 }, { "epoch": 0.7243398746532416, "grad_norm": 0.08782347291707993, "learning_rate": 0.01, "loss": 2.1474, "step": 7050 }, { "epoch": 0.7246481043871366, "grad_norm": 0.06585227698087692, "learning_rate": 0.01, "loss": 2.1228, "step": 7053 }, { "epoch": 0.7249563341210316, "grad_norm": 0.06324558705091476, "learning_rate": 0.01, "loss": 2.1313, "step": 7056 }, { "epoch": 0.7252645638549265, "grad_norm": 0.057287219911813736, "learning_rate": 0.01, "loss": 2.1241, "step": 7059 }, { "epoch": 0.7255727935888215, "grad_norm": 0.07684747129678726, "learning_rate": 0.01, "loss": 2.1299, "step": 7062 }, { "epoch": 0.7258810233227165, "grad_norm": 0.10347555577754974, "learning_rate": 0.01, "loss": 2.12, "step": 7065 }, { "epoch": 0.7261892530566115, "grad_norm": 0.06019530072808266, "learning_rate": 0.01, "loss": 2.138, "step": 7068 }, { "epoch": 0.7264974827905065, "grad_norm": 0.04816723242402077, "learning_rate": 0.01, "loss": 2.1161, "step": 7071 }, { "epoch": 0.7268057125244015, "grad_norm": 0.05839864909648895, "learning_rate": 0.01, "loss": 2.136, "step": 7074 }, { "epoch": 0.7271139422582965, "grad_norm": 0.061795271933078766, "learning_rate": 0.01, "loss": 2.1315, "step": 7077 }, { "epoch": 0.7274221719921915, "grad_norm": 0.05736471712589264, "learning_rate": 0.01, "loss": 2.1403, "step": 7080 }, { "epoch": 0.7277304017260865, "grad_norm": 0.059238459914922714, "learning_rate": 0.01, "loss": 2.1101, "step": 7083 }, { "epoch": 0.7280386314599815, "grad_norm": 0.10844148695468903, "learning_rate": 0.01, "loss": 2.1454, "step": 7086 }, { "epoch": 0.7283468611938765, "grad_norm": 0.047568898648023605, "learning_rate": 0.01, "loss": 2.1183, "step": 7089 }, { "epoch": 0.7286550909277715, "grad_norm": 0.05178900063037872, "learning_rate": 0.01, "loss": 2.1346, "step": 7092 }, { "epoch": 0.7289633206616665, "grad_norm": 0.04113532230257988, "learning_rate": 0.01, "loss": 2.0915, "step": 7095 }, { "epoch": 0.7292715503955615, "grad_norm": 0.10488615930080414, "learning_rate": 0.01, "loss": 2.1239, "step": 7098 }, { "epoch": 0.7295797801294565, "grad_norm": 0.13013161718845367, "learning_rate": 0.01, "loss": 2.1251, "step": 7101 }, { "epoch": 0.7298880098633515, "grad_norm": 0.10956915467977524, "learning_rate": 0.01, "loss": 2.1113, "step": 7104 }, { "epoch": 0.7301962395972464, "grad_norm": 0.06996689736843109, "learning_rate": 0.01, "loss": 2.118, "step": 7107 }, { "epoch": 0.7305044693311414, "grad_norm": 0.07773365080356598, "learning_rate": 0.01, "loss": 2.1144, "step": 7110 }, { "epoch": 0.7308126990650364, "grad_norm": 0.06922838091850281, "learning_rate": 0.01, "loss": 2.1148, "step": 7113 }, { "epoch": 0.7311209287989314, "grad_norm": 0.08941454440355301, "learning_rate": 0.01, "loss": 2.1493, "step": 7116 }, { "epoch": 0.7314291585328264, "grad_norm": 0.04264171048998833, "learning_rate": 0.01, "loss": 2.136, "step": 7119 }, { "epoch": 0.7317373882667214, "grad_norm": 0.04473461955785751, "learning_rate": 0.01, "loss": 2.1294, "step": 7122 }, { "epoch": 0.7320456180006164, "grad_norm": 0.0396125465631485, "learning_rate": 0.01, "loss": 2.1439, "step": 7125 }, { "epoch": 0.7323538477345115, "grad_norm": 0.04613679647445679, "learning_rate": 0.01, "loss": 2.1503, "step": 7128 }, { "epoch": 0.7326620774684065, "grad_norm": 0.04897918924689293, "learning_rate": 0.01, "loss": 2.1214, "step": 7131 }, { "epoch": 0.7329703072023015, "grad_norm": 0.05057375133037567, "learning_rate": 0.01, "loss": 2.1112, "step": 7134 }, { "epoch": 0.7332785369361965, "grad_norm": 0.05711055174469948, "learning_rate": 0.01, "loss": 2.102, "step": 7137 }, { "epoch": 0.7335867666700915, "grad_norm": 0.08658434450626373, "learning_rate": 0.01, "loss": 2.1574, "step": 7140 }, { "epoch": 0.7338949964039865, "grad_norm": 0.07044188678264618, "learning_rate": 0.01, "loss": 2.1037, "step": 7143 }, { "epoch": 0.7342032261378815, "grad_norm": 0.03941315785050392, "learning_rate": 0.01, "loss": 2.1369, "step": 7146 }, { "epoch": 0.7345114558717765, "grad_norm": 0.04527783393859863, "learning_rate": 0.01, "loss": 2.1212, "step": 7149 }, { "epoch": 0.7348196856056715, "grad_norm": 0.07909847050905228, "learning_rate": 0.01, "loss": 2.1316, "step": 7152 }, { "epoch": 0.7351279153395665, "grad_norm": 0.12793006002902985, "learning_rate": 0.01, "loss": 2.1254, "step": 7155 }, { "epoch": 0.7354361450734614, "grad_norm": 0.0639350563287735, "learning_rate": 0.01, "loss": 2.1319, "step": 7158 }, { "epoch": 0.7357443748073564, "grad_norm": 0.0342305451631546, "learning_rate": 0.01, "loss": 2.1386, "step": 7161 }, { "epoch": 0.7360526045412514, "grad_norm": 0.049001939594745636, "learning_rate": 0.01, "loss": 2.1485, "step": 7164 }, { "epoch": 0.7363608342751464, "grad_norm": 0.047717638313770294, "learning_rate": 0.01, "loss": 2.1368, "step": 7167 }, { "epoch": 0.7366690640090414, "grad_norm": 0.04402822256088257, "learning_rate": 0.01, "loss": 2.1162, "step": 7170 }, { "epoch": 0.7369772937429364, "grad_norm": 0.06922505795955658, "learning_rate": 0.01, "loss": 2.1279, "step": 7173 }, { "epoch": 0.7372855234768314, "grad_norm": 0.06231709569692612, "learning_rate": 0.01, "loss": 2.0946, "step": 7176 }, { "epoch": 0.7375937532107264, "grad_norm": 0.11480400711297989, "learning_rate": 0.01, "loss": 2.146, "step": 7179 }, { "epoch": 0.7379019829446214, "grad_norm": 0.05144179239869118, "learning_rate": 0.01, "loss": 2.1128, "step": 7182 }, { "epoch": 0.7382102126785164, "grad_norm": 0.05130591616034508, "learning_rate": 0.01, "loss": 2.0964, "step": 7185 }, { "epoch": 0.7385184424124114, "grad_norm": 0.0549122579395771, "learning_rate": 0.01, "loss": 2.165, "step": 7188 }, { "epoch": 0.7388266721463064, "grad_norm": 0.1378844678401947, "learning_rate": 0.01, "loss": 2.1367, "step": 7191 }, { "epoch": 0.7391349018802014, "grad_norm": 0.06231486052274704, "learning_rate": 0.01, "loss": 2.1341, "step": 7194 }, { "epoch": 0.7394431316140964, "grad_norm": 0.10189559310674667, "learning_rate": 0.01, "loss": 2.1161, "step": 7197 }, { "epoch": 0.7397513613479914, "grad_norm": 0.053364284336566925, "learning_rate": 0.01, "loss": 2.1043, "step": 7200 }, { "epoch": 0.7400595910818863, "grad_norm": 0.046057943254709244, "learning_rate": 0.01, "loss": 2.1011, "step": 7203 }, { "epoch": 0.7403678208157813, "grad_norm": 0.04084615036845207, "learning_rate": 0.01, "loss": 2.1253, "step": 7206 }, { "epoch": 0.7406760505496763, "grad_norm": 0.04594961181282997, "learning_rate": 0.01, "loss": 2.1228, "step": 7209 }, { "epoch": 0.7409842802835713, "grad_norm": 0.06608622521162033, "learning_rate": 0.01, "loss": 2.1188, "step": 7212 }, { "epoch": 0.7412925100174663, "grad_norm": 0.125398188829422, "learning_rate": 0.01, "loss": 2.1057, "step": 7215 }, { "epoch": 0.7416007397513613, "grad_norm": 0.08068963885307312, "learning_rate": 0.01, "loss": 2.0947, "step": 7218 }, { "epoch": 0.7419089694852563, "grad_norm": 0.07993921637535095, "learning_rate": 0.01, "loss": 2.1214, "step": 7221 }, { "epoch": 0.7422171992191513, "grad_norm": 0.04969675466418266, "learning_rate": 0.01, "loss": 2.1099, "step": 7224 }, { "epoch": 0.7425254289530463, "grad_norm": 0.054677605628967285, "learning_rate": 0.01, "loss": 2.1229, "step": 7227 }, { "epoch": 0.7428336586869413, "grad_norm": 0.04562999680638313, "learning_rate": 0.01, "loss": 2.1409, "step": 7230 }, { "epoch": 0.7431418884208363, "grad_norm": 0.07618910074234009, "learning_rate": 0.01, "loss": 2.0924, "step": 7233 }, { "epoch": 0.7434501181547313, "grad_norm": 0.14368098974227905, "learning_rate": 0.01, "loss": 2.1348, "step": 7236 }, { "epoch": 0.7437583478886263, "grad_norm": 0.05517590045928955, "learning_rate": 0.01, "loss": 2.116, "step": 7239 }, { "epoch": 0.7440665776225214, "grad_norm": 0.17316390573978424, "learning_rate": 0.01, "loss": 2.1363, "step": 7242 }, { "epoch": 0.7443748073564164, "grad_norm": 0.15268415212631226, "learning_rate": 0.01, "loss": 2.1033, "step": 7245 }, { "epoch": 0.7446830370903114, "grad_norm": 0.06212317943572998, "learning_rate": 0.01, "loss": 2.0971, "step": 7248 }, { "epoch": 0.7449912668242064, "grad_norm": 0.04282272607088089, "learning_rate": 0.01, "loss": 2.1434, "step": 7251 }, { "epoch": 0.7452994965581013, "grad_norm": 0.04305952787399292, "learning_rate": 0.01, "loss": 2.1406, "step": 7254 }, { "epoch": 0.7456077262919963, "grad_norm": 0.048668697476387024, "learning_rate": 0.01, "loss": 2.1303, "step": 7257 }, { "epoch": 0.7459159560258913, "grad_norm": 0.05524542182683945, "learning_rate": 0.01, "loss": 2.1331, "step": 7260 }, { "epoch": 0.7462241857597863, "grad_norm": 0.0438026525080204, "learning_rate": 0.01, "loss": 2.109, "step": 7263 }, { "epoch": 0.7465324154936813, "grad_norm": 0.08154566586017609, "learning_rate": 0.01, "loss": 2.1053, "step": 7266 }, { "epoch": 0.7468406452275763, "grad_norm": 0.11754357814788818, "learning_rate": 0.01, "loss": 2.1298, "step": 7269 }, { "epoch": 0.7471488749614713, "grad_norm": 0.06593465805053711, "learning_rate": 0.01, "loss": 2.1323, "step": 7272 }, { "epoch": 0.7474571046953663, "grad_norm": 0.08065393567085266, "learning_rate": 0.01, "loss": 2.1297, "step": 7275 }, { "epoch": 0.7477653344292613, "grad_norm": 0.10624121129512787, "learning_rate": 0.01, "loss": 2.1175, "step": 7278 }, { "epoch": 0.7480735641631563, "grad_norm": 0.06357972323894501, "learning_rate": 0.01, "loss": 2.164, "step": 7281 }, { "epoch": 0.7483817938970513, "grad_norm": 0.03753754869103432, "learning_rate": 0.01, "loss": 2.1519, "step": 7284 }, { "epoch": 0.7486900236309463, "grad_norm": 0.04756931588053703, "learning_rate": 0.01, "loss": 2.0901, "step": 7287 }, { "epoch": 0.7489982533648413, "grad_norm": 0.0494108609855175, "learning_rate": 0.01, "loss": 2.1474, "step": 7290 }, { "epoch": 0.7493064830987363, "grad_norm": 0.063727006316185, "learning_rate": 0.01, "loss": 2.1425, "step": 7293 }, { "epoch": 0.7496147128326313, "grad_norm": 0.06327082961797714, "learning_rate": 0.01, "loss": 2.1346, "step": 7296 }, { "epoch": 0.7499229425665263, "grad_norm": 0.10383486747741699, "learning_rate": 0.01, "loss": 2.1245, "step": 7299 }, { "epoch": 0.7502311723004212, "grad_norm": 0.10473886877298355, "learning_rate": 0.01, "loss": 2.1302, "step": 7302 }, { "epoch": 0.7505394020343162, "grad_norm": 0.04905236139893532, "learning_rate": 0.01, "loss": 2.119, "step": 7305 }, { "epoch": 0.7508476317682112, "grad_norm": 0.04571664705872536, "learning_rate": 0.01, "loss": 2.1505, "step": 7308 }, { "epoch": 0.7511558615021062, "grad_norm": 0.06305412203073502, "learning_rate": 0.01, "loss": 2.1389, "step": 7311 }, { "epoch": 0.7514640912360012, "grad_norm": 0.05825283005833626, "learning_rate": 0.01, "loss": 2.1361, "step": 7314 }, { "epoch": 0.7517723209698962, "grad_norm": 0.059476301074028015, "learning_rate": 0.01, "loss": 2.1191, "step": 7317 }, { "epoch": 0.7520805507037912, "grad_norm": 0.042396873235702515, "learning_rate": 0.01, "loss": 2.1309, "step": 7320 }, { "epoch": 0.7523887804376862, "grad_norm": 0.04611228406429291, "learning_rate": 0.01, "loss": 2.1438, "step": 7323 }, { "epoch": 0.7526970101715812, "grad_norm": 0.09147686511278152, "learning_rate": 0.01, "loss": 2.1288, "step": 7326 }, { "epoch": 0.7530052399054762, "grad_norm": 0.08085332810878754, "learning_rate": 0.01, "loss": 2.1427, "step": 7329 }, { "epoch": 0.7533134696393712, "grad_norm": 0.03873496130108833, "learning_rate": 0.01, "loss": 2.1257, "step": 7332 }, { "epoch": 0.7536216993732662, "grad_norm": 0.05457824096083641, "learning_rate": 0.01, "loss": 2.1373, "step": 7335 }, { "epoch": 0.7539299291071612, "grad_norm": 0.049249522387981415, "learning_rate": 0.01, "loss": 2.1185, "step": 7338 }, { "epoch": 0.7542381588410562, "grad_norm": 0.07082841545343399, "learning_rate": 0.01, "loss": 2.1157, "step": 7341 }, { "epoch": 0.7545463885749512, "grad_norm": 0.046108178794384, "learning_rate": 0.01, "loss": 2.1238, "step": 7344 }, { "epoch": 0.7548546183088461, "grad_norm": 0.05572620406746864, "learning_rate": 0.01, "loss": 2.1445, "step": 7347 }, { "epoch": 0.7551628480427411, "grad_norm": 0.1091703474521637, "learning_rate": 0.01, "loss": 2.1281, "step": 7350 }, { "epoch": 0.7554710777766361, "grad_norm": 0.09372757375240326, "learning_rate": 0.01, "loss": 2.1231, "step": 7353 }, { "epoch": 0.7557793075105311, "grad_norm": 0.0482059009373188, "learning_rate": 0.01, "loss": 2.1003, "step": 7356 }, { "epoch": 0.7560875372444262, "grad_norm": 0.041941821575164795, "learning_rate": 0.01, "loss": 2.1382, "step": 7359 }, { "epoch": 0.7563957669783212, "grad_norm": 0.07122782617807388, "learning_rate": 0.01, "loss": 2.1419, "step": 7362 }, { "epoch": 0.7567039967122162, "grad_norm": 0.06854265183210373, "learning_rate": 0.01, "loss": 2.1328, "step": 7365 }, { "epoch": 0.7570122264461112, "grad_norm": 0.10073423385620117, "learning_rate": 0.01, "loss": 2.1322, "step": 7368 }, { "epoch": 0.7573204561800062, "grad_norm": 0.038869407027959824, "learning_rate": 0.01, "loss": 2.1273, "step": 7371 }, { "epoch": 0.7576286859139012, "grad_norm": 0.09483812749385834, "learning_rate": 0.01, "loss": 2.1465, "step": 7374 }, { "epoch": 0.7579369156477962, "grad_norm": 0.07226487994194031, "learning_rate": 0.01, "loss": 2.1386, "step": 7377 }, { "epoch": 0.7582451453816912, "grad_norm": 0.05041668191552162, "learning_rate": 0.01, "loss": 2.1249, "step": 7380 }, { "epoch": 0.7585533751155862, "grad_norm": 0.03839525580406189, "learning_rate": 0.01, "loss": 2.1125, "step": 7383 }, { "epoch": 0.7588616048494812, "grad_norm": 0.047746479511260986, "learning_rate": 0.01, "loss": 2.1027, "step": 7386 }, { "epoch": 0.7591698345833762, "grad_norm": 0.05524810031056404, "learning_rate": 0.01, "loss": 2.166, "step": 7389 }, { "epoch": 0.7594780643172712, "grad_norm": 0.050045181065797806, "learning_rate": 0.01, "loss": 2.1411, "step": 7392 }, { "epoch": 0.7597862940511662, "grad_norm": 0.09187906980514526, "learning_rate": 0.01, "loss": 2.13, "step": 7395 }, { "epoch": 0.7600945237850611, "grad_norm": 0.15085643529891968, "learning_rate": 0.01, "loss": 2.1198, "step": 7398 }, { "epoch": 0.7604027535189561, "grad_norm": 0.05295104160904884, "learning_rate": 0.01, "loss": 2.1067, "step": 7401 }, { "epoch": 0.7607109832528511, "grad_norm": 0.03696104511618614, "learning_rate": 0.01, "loss": 2.1159, "step": 7404 }, { "epoch": 0.7610192129867461, "grad_norm": 0.04209265485405922, "learning_rate": 0.01, "loss": 2.1243, "step": 7407 }, { "epoch": 0.7613274427206411, "grad_norm": 0.056943077594041824, "learning_rate": 0.01, "loss": 2.1038, "step": 7410 }, { "epoch": 0.7616356724545361, "grad_norm": 0.12749402225017548, "learning_rate": 0.01, "loss": 2.1087, "step": 7413 }, { "epoch": 0.7619439021884311, "grad_norm": 0.09119253605604172, "learning_rate": 0.01, "loss": 2.1252, "step": 7416 }, { "epoch": 0.7622521319223261, "grad_norm": 0.04251190647482872, "learning_rate": 0.01, "loss": 2.1384, "step": 7419 }, { "epoch": 0.7625603616562211, "grad_norm": 0.04010685533285141, "learning_rate": 0.01, "loss": 2.1449, "step": 7422 }, { "epoch": 0.7628685913901161, "grad_norm": 0.05524475499987602, "learning_rate": 0.01, "loss": 2.0841, "step": 7425 }, { "epoch": 0.7631768211240111, "grad_norm": 0.10250036418437958, "learning_rate": 0.01, "loss": 2.0827, "step": 7428 }, { "epoch": 0.7634850508579061, "grad_norm": 0.0748668685555458, "learning_rate": 0.01, "loss": 2.128, "step": 7431 }, { "epoch": 0.7637932805918011, "grad_norm": 0.08616036176681519, "learning_rate": 0.01, "loss": 2.1087, "step": 7434 }, { "epoch": 0.7641015103256961, "grad_norm": 0.09491308033466339, "learning_rate": 0.01, "loss": 2.1247, "step": 7437 }, { "epoch": 0.7644097400595911, "grad_norm": 0.08575759083032608, "learning_rate": 0.01, "loss": 2.1419, "step": 7440 }, { "epoch": 0.764717969793486, "grad_norm": 0.04314613714814186, "learning_rate": 0.01, "loss": 2.1462, "step": 7443 }, { "epoch": 0.765026199527381, "grad_norm": 0.035719119012355804, "learning_rate": 0.01, "loss": 2.1337, "step": 7446 }, { "epoch": 0.765334429261276, "grad_norm": 0.04597650095820427, "learning_rate": 0.01, "loss": 2.1258, "step": 7449 }, { "epoch": 0.765642658995171, "grad_norm": 0.10039210319519043, "learning_rate": 0.01, "loss": 2.1238, "step": 7452 }, { "epoch": 0.765950888729066, "grad_norm": 0.07157409191131592, "learning_rate": 0.01, "loss": 2.1349, "step": 7455 }, { "epoch": 0.766259118462961, "grad_norm": 0.09058292210102081, "learning_rate": 0.01, "loss": 2.1232, "step": 7458 }, { "epoch": 0.766567348196856, "grad_norm": 0.06009940057992935, "learning_rate": 0.01, "loss": 2.1386, "step": 7461 }, { "epoch": 0.766875577930751, "grad_norm": 0.1165439561009407, "learning_rate": 0.01, "loss": 2.1293, "step": 7464 }, { "epoch": 0.767183807664646, "grad_norm": 0.06138407811522484, "learning_rate": 0.01, "loss": 2.1066, "step": 7467 }, { "epoch": 0.767492037398541, "grad_norm": 0.06058945506811142, "learning_rate": 0.01, "loss": 2.1044, "step": 7470 }, { "epoch": 0.767800267132436, "grad_norm": 0.06741827726364136, "learning_rate": 0.01, "loss": 2.1191, "step": 7473 }, { "epoch": 0.7681084968663311, "grad_norm": 0.047926925122737885, "learning_rate": 0.01, "loss": 2.1333, "step": 7476 }, { "epoch": 0.7684167266002261, "grad_norm": 0.06450969725847244, "learning_rate": 0.01, "loss": 2.1061, "step": 7479 }, { "epoch": 0.7687249563341211, "grad_norm": 0.11133641749620438, "learning_rate": 0.01, "loss": 2.1129, "step": 7482 }, { "epoch": 0.7690331860680161, "grad_norm": 0.049795158207416534, "learning_rate": 0.01, "loss": 2.136, "step": 7485 }, { "epoch": 0.7693414158019111, "grad_norm": 0.06083859130740166, "learning_rate": 0.01, "loss": 2.1459, "step": 7488 }, { "epoch": 0.7696496455358061, "grad_norm": 0.04686833918094635, "learning_rate": 0.01, "loss": 2.1073, "step": 7491 }, { "epoch": 0.769957875269701, "grad_norm": 0.05475611612200737, "learning_rate": 0.01, "loss": 2.1167, "step": 7494 }, { "epoch": 0.770266105003596, "grad_norm": 0.04683786630630493, "learning_rate": 0.01, "loss": 2.1491, "step": 7497 }, { "epoch": 0.770574334737491, "grad_norm": 0.10841275751590729, "learning_rate": 0.01, "loss": 2.0967, "step": 7500 }, { "epoch": 0.770882564471386, "grad_norm": 0.09716581553220749, "learning_rate": 0.01, "loss": 2.1133, "step": 7503 }, { "epoch": 0.771190794205281, "grad_norm": 0.04913085699081421, "learning_rate": 0.01, "loss": 2.1401, "step": 7506 }, { "epoch": 0.771499023939176, "grad_norm": 0.04710682854056358, "learning_rate": 0.01, "loss": 2.109, "step": 7509 }, { "epoch": 0.771807253673071, "grad_norm": 0.054945673793554306, "learning_rate": 0.01, "loss": 2.1169, "step": 7512 }, { "epoch": 0.772115483406966, "grad_norm": 0.04265155643224716, "learning_rate": 0.01, "loss": 2.1156, "step": 7515 }, { "epoch": 0.772423713140861, "grad_norm": 0.03544042259454727, "learning_rate": 0.01, "loss": 2.1172, "step": 7518 }, { "epoch": 0.772731942874756, "grad_norm": 0.05048484355211258, "learning_rate": 0.01, "loss": 2.1015, "step": 7521 }, { "epoch": 0.773040172608651, "grad_norm": 0.14160272479057312, "learning_rate": 0.01, "loss": 2.1475, "step": 7524 }, { "epoch": 0.773348402342546, "grad_norm": 0.08693049848079681, "learning_rate": 0.01, "loss": 2.1266, "step": 7527 }, { "epoch": 0.773656632076441, "grad_norm": 0.06437800824642181, "learning_rate": 0.01, "loss": 2.1273, "step": 7530 }, { "epoch": 0.773964861810336, "grad_norm": 0.04450656846165657, "learning_rate": 0.01, "loss": 2.1192, "step": 7533 }, { "epoch": 0.774273091544231, "grad_norm": 0.05369933694601059, "learning_rate": 0.01, "loss": 2.1264, "step": 7536 }, { "epoch": 0.774581321278126, "grad_norm": 0.04080953076481819, "learning_rate": 0.01, "loss": 2.1319, "step": 7539 }, { "epoch": 0.774889551012021, "grad_norm": 0.03433745354413986, "learning_rate": 0.01, "loss": 2.1024, "step": 7542 }, { "epoch": 0.7751977807459159, "grad_norm": 0.1574896275997162, "learning_rate": 0.01, "loss": 2.1182, "step": 7545 }, { "epoch": 0.7755060104798109, "grad_norm": 0.1207810789346695, "learning_rate": 0.01, "loss": 2.1052, "step": 7548 }, { "epoch": 0.7758142402137059, "grad_norm": 0.07270894944667816, "learning_rate": 0.01, "loss": 2.1331, "step": 7551 }, { "epoch": 0.7761224699476009, "grad_norm": 0.07062831521034241, "learning_rate": 0.01, "loss": 2.099, "step": 7554 }, { "epoch": 0.7764306996814959, "grad_norm": 0.04142964631319046, "learning_rate": 0.01, "loss": 2.1192, "step": 7557 }, { "epoch": 0.7767389294153909, "grad_norm": 0.04645151272416115, "learning_rate": 0.01, "loss": 2.1117, "step": 7560 }, { "epoch": 0.7770471591492859, "grad_norm": 0.046251073479652405, "learning_rate": 0.01, "loss": 2.1399, "step": 7563 }, { "epoch": 0.7773553888831809, "grad_norm": 0.07185769826173782, "learning_rate": 0.01, "loss": 2.1261, "step": 7566 }, { "epoch": 0.7776636186170759, "grad_norm": 0.045216575264930725, "learning_rate": 0.01, "loss": 2.1302, "step": 7569 }, { "epoch": 0.7779718483509709, "grad_norm": 0.04923580586910248, "learning_rate": 0.01, "loss": 2.1482, "step": 7572 }, { "epoch": 0.7782800780848659, "grad_norm": 0.06434139609336853, "learning_rate": 0.01, "loss": 2.1325, "step": 7575 }, { "epoch": 0.7785883078187609, "grad_norm": 0.11186740547418594, "learning_rate": 0.01, "loss": 2.1168, "step": 7578 }, { "epoch": 0.7788965375526559, "grad_norm": 0.06694278120994568, "learning_rate": 0.01, "loss": 2.15, "step": 7581 }, { "epoch": 0.7792047672865509, "grad_norm": 0.05431769788265228, "learning_rate": 0.01, "loss": 2.1156, "step": 7584 }, { "epoch": 0.7795129970204459, "grad_norm": 0.05853963643312454, "learning_rate": 0.01, "loss": 2.1145, "step": 7587 }, { "epoch": 0.779821226754341, "grad_norm": 0.04059399664402008, "learning_rate": 0.01, "loss": 2.1373, "step": 7590 }, { "epoch": 0.780129456488236, "grad_norm": 0.06444236636161804, "learning_rate": 0.01, "loss": 2.1167, "step": 7593 }, { "epoch": 0.7804376862221309, "grad_norm": 0.09885245561599731, "learning_rate": 0.01, "loss": 2.1146, "step": 7596 }, { "epoch": 0.7807459159560259, "grad_norm": 0.08536794036626816, "learning_rate": 0.01, "loss": 2.1282, "step": 7599 }, { "epoch": 0.7810541456899209, "grad_norm": 0.04299011081457138, "learning_rate": 0.01, "loss": 2.103, "step": 7602 }, { "epoch": 0.7813623754238159, "grad_norm": 0.060757700353860855, "learning_rate": 0.01, "loss": 2.0923, "step": 7605 }, { "epoch": 0.7816706051577109, "grad_norm": 0.037401244044303894, "learning_rate": 0.01, "loss": 2.1343, "step": 7608 }, { "epoch": 0.7819788348916059, "grad_norm": 0.12264932692050934, "learning_rate": 0.01, "loss": 2.1193, "step": 7611 }, { "epoch": 0.7822870646255009, "grad_norm": 0.052691470831632614, "learning_rate": 0.01, "loss": 2.1097, "step": 7614 }, { "epoch": 0.7825952943593959, "grad_norm": 0.05509025603532791, "learning_rate": 0.01, "loss": 2.1208, "step": 7617 }, { "epoch": 0.7829035240932909, "grad_norm": 0.10352631658315659, "learning_rate": 0.01, "loss": 2.1277, "step": 7620 }, { "epoch": 0.7832117538271859, "grad_norm": 0.05865751951932907, "learning_rate": 0.01, "loss": 2.138, "step": 7623 }, { "epoch": 0.7835199835610809, "grad_norm": 0.09445837885141373, "learning_rate": 0.01, "loss": 2.1131, "step": 7626 }, { "epoch": 0.7838282132949759, "grad_norm": 0.11066542565822601, "learning_rate": 0.01, "loss": 2.1315, "step": 7629 }, { "epoch": 0.7841364430288709, "grad_norm": 0.05489170923829079, "learning_rate": 0.01, "loss": 2.1264, "step": 7632 }, { "epoch": 0.7844446727627659, "grad_norm": 0.06804061681032181, "learning_rate": 0.01, "loss": 2.1491, "step": 7635 }, { "epoch": 0.7847529024966609, "grad_norm": 0.07411237061023712, "learning_rate": 0.01, "loss": 2.126, "step": 7638 }, { "epoch": 0.7850611322305558, "grad_norm": 0.050356972962617874, "learning_rate": 0.01, "loss": 2.1237, "step": 7641 }, { "epoch": 0.7853693619644508, "grad_norm": 0.06125912442803383, "learning_rate": 0.01, "loss": 2.1328, "step": 7644 }, { "epoch": 0.7856775916983458, "grad_norm": 0.05983618274331093, "learning_rate": 0.01, "loss": 2.1152, "step": 7647 }, { "epoch": 0.7859858214322408, "grad_norm": 0.04065684601664543, "learning_rate": 0.01, "loss": 2.1213, "step": 7650 }, { "epoch": 0.7862940511661358, "grad_norm": 0.05535745993256569, "learning_rate": 0.01, "loss": 2.1106, "step": 7653 }, { "epoch": 0.7866022809000308, "grad_norm": 0.09727519005537033, "learning_rate": 0.01, "loss": 2.1202, "step": 7656 }, { "epoch": 0.7869105106339258, "grad_norm": 0.07764584571123123, "learning_rate": 0.01, "loss": 2.1181, "step": 7659 }, { "epoch": 0.7872187403678208, "grad_norm": 0.04933121055364609, "learning_rate": 0.01, "loss": 2.1217, "step": 7662 }, { "epoch": 0.7875269701017158, "grad_norm": 0.12199501693248749, "learning_rate": 0.01, "loss": 2.1412, "step": 7665 }, { "epoch": 0.7878351998356108, "grad_norm": 0.14431309700012207, "learning_rate": 0.01, "loss": 2.1249, "step": 7668 }, { "epoch": 0.7881434295695058, "grad_norm": 0.07583998888731003, "learning_rate": 0.01, "loss": 2.139, "step": 7671 }, { "epoch": 0.7884516593034008, "grad_norm": 0.10426465421915054, "learning_rate": 0.01, "loss": 2.1053, "step": 7674 }, { "epoch": 0.7887598890372958, "grad_norm": 0.06411170959472656, "learning_rate": 0.01, "loss": 2.1105, "step": 7677 }, { "epoch": 0.7890681187711908, "grad_norm": 0.07436025142669678, "learning_rate": 0.01, "loss": 2.1301, "step": 7680 }, { "epoch": 0.7893763485050858, "grad_norm": 0.10409426689147949, "learning_rate": 0.01, "loss": 2.1319, "step": 7683 }, { "epoch": 0.7896845782389807, "grad_norm": 0.05232664570212364, "learning_rate": 0.01, "loss": 2.1458, "step": 7686 }, { "epoch": 0.7899928079728757, "grad_norm": 0.06705309450626373, "learning_rate": 0.01, "loss": 2.1231, "step": 7689 }, { "epoch": 0.7903010377067707, "grad_norm": 0.04422546550631523, "learning_rate": 0.01, "loss": 2.0836, "step": 7692 }, { "epoch": 0.7906092674406657, "grad_norm": 0.04316714033484459, "learning_rate": 0.01, "loss": 2.1117, "step": 7695 }, { "epoch": 0.7909174971745607, "grad_norm": 0.058282140642404556, "learning_rate": 0.01, "loss": 2.0904, "step": 7698 }, { "epoch": 0.7912257269084557, "grad_norm": 0.07676571607589722, "learning_rate": 0.01, "loss": 2.1402, "step": 7701 }, { "epoch": 0.7915339566423507, "grad_norm": 0.07258665561676025, "learning_rate": 0.01, "loss": 2.1458, "step": 7704 }, { "epoch": 0.7918421863762458, "grad_norm": 0.04850257560610771, "learning_rate": 0.01, "loss": 2.0886, "step": 7707 }, { "epoch": 0.7921504161101408, "grad_norm": 0.05658482015132904, "learning_rate": 0.01, "loss": 2.1174, "step": 7710 }, { "epoch": 0.7924586458440358, "grad_norm": 0.06475166231393814, "learning_rate": 0.01, "loss": 2.0995, "step": 7713 }, { "epoch": 0.7927668755779308, "grad_norm": 0.10428962856531143, "learning_rate": 0.01, "loss": 2.109, "step": 7716 }, { "epoch": 0.7930751053118258, "grad_norm": 0.04227283224463463, "learning_rate": 0.01, "loss": 2.1124, "step": 7719 }, { "epoch": 0.7933833350457208, "grad_norm": 0.0594823881983757, "learning_rate": 0.01, "loss": 2.0944, "step": 7722 }, { "epoch": 0.7936915647796158, "grad_norm": 0.08695527911186218, "learning_rate": 0.01, "loss": 2.1077, "step": 7725 }, { "epoch": 0.7939997945135108, "grad_norm": 0.06003952398896217, "learning_rate": 0.01, "loss": 2.088, "step": 7728 }, { "epoch": 0.7943080242474058, "grad_norm": 0.058509476482868195, "learning_rate": 0.01, "loss": 2.1471, "step": 7731 }, { "epoch": 0.7946162539813008, "grad_norm": 0.048057131469249725, "learning_rate": 0.01, "loss": 2.1252, "step": 7734 }, { "epoch": 0.7949244837151958, "grad_norm": 0.11144626140594482, "learning_rate": 0.01, "loss": 2.1209, "step": 7737 }, { "epoch": 0.7952327134490907, "grad_norm": 0.041008081287145615, "learning_rate": 0.01, "loss": 2.1139, "step": 7740 }, { "epoch": 0.7955409431829857, "grad_norm": 0.04088988155126572, "learning_rate": 0.01, "loss": 2.0927, "step": 7743 }, { "epoch": 0.7958491729168807, "grad_norm": 0.1495555192232132, "learning_rate": 0.01, "loss": 2.0977, "step": 7746 }, { "epoch": 0.7961574026507757, "grad_norm": 0.042645204812288284, "learning_rate": 0.01, "loss": 2.1021, "step": 7749 }, { "epoch": 0.7964656323846707, "grad_norm": 0.04671596363186836, "learning_rate": 0.01, "loss": 2.1015, "step": 7752 }, { "epoch": 0.7967738621185657, "grad_norm": 0.07249152660369873, "learning_rate": 0.01, "loss": 2.1278, "step": 7755 }, { "epoch": 0.7970820918524607, "grad_norm": 0.05848756060004234, "learning_rate": 0.01, "loss": 2.1168, "step": 7758 }, { "epoch": 0.7973903215863557, "grad_norm": 0.05428781732916832, "learning_rate": 0.01, "loss": 2.1228, "step": 7761 }, { "epoch": 0.7976985513202507, "grad_norm": 0.04751111939549446, "learning_rate": 0.01, "loss": 2.1178, "step": 7764 }, { "epoch": 0.7980067810541457, "grad_norm": 0.08653240650892258, "learning_rate": 0.01, "loss": 2.1081, "step": 7767 }, { "epoch": 0.7983150107880407, "grad_norm": 0.04038892313838005, "learning_rate": 0.01, "loss": 2.1028, "step": 7770 }, { "epoch": 0.7986232405219357, "grad_norm": 0.05703849345445633, "learning_rate": 0.01, "loss": 2.1249, "step": 7773 }, { "epoch": 0.7989314702558307, "grad_norm": 0.06425055861473083, "learning_rate": 0.01, "loss": 2.1291, "step": 7776 }, { "epoch": 0.7992396999897257, "grad_norm": 0.05537475273013115, "learning_rate": 0.01, "loss": 2.1122, "step": 7779 }, { "epoch": 0.7995479297236207, "grad_norm": 0.05172963812947273, "learning_rate": 0.01, "loss": 2.1218, "step": 7782 }, { "epoch": 0.7998561594575156, "grad_norm": 0.05907023698091507, "learning_rate": 0.01, "loss": 2.1041, "step": 7785 }, { "epoch": 0.8001643891914106, "grad_norm": 0.10618621110916138, "learning_rate": 0.01, "loss": 2.1266, "step": 7788 }, { "epoch": 0.8004726189253056, "grad_norm": 0.06189849600195885, "learning_rate": 0.01, "loss": 2.1327, "step": 7791 }, { "epoch": 0.8007808486592006, "grad_norm": 0.10624901950359344, "learning_rate": 0.01, "loss": 2.0943, "step": 7794 }, { "epoch": 0.8010890783930956, "grad_norm": 0.04061825945973396, "learning_rate": 0.01, "loss": 2.0859, "step": 7797 }, { "epoch": 0.8013973081269906, "grad_norm": 0.04402461647987366, "learning_rate": 0.01, "loss": 2.1303, "step": 7800 }, { "epoch": 0.8017055378608856, "grad_norm": 0.05029004439711571, "learning_rate": 0.01, "loss": 2.1224, "step": 7803 }, { "epoch": 0.8020137675947806, "grad_norm": 0.055786702781915665, "learning_rate": 0.01, "loss": 2.1296, "step": 7806 }, { "epoch": 0.8023219973286756, "grad_norm": 0.11740477383136749, "learning_rate": 0.01, "loss": 2.1222, "step": 7809 }, { "epoch": 0.8026302270625706, "grad_norm": 0.10261218994855881, "learning_rate": 0.01, "loss": 2.1405, "step": 7812 }, { "epoch": 0.8029384567964656, "grad_norm": 0.05233708769083023, "learning_rate": 0.01, "loss": 2.1118, "step": 7815 }, { "epoch": 0.8032466865303606, "grad_norm": 0.04390858858823776, "learning_rate": 0.01, "loss": 2.1299, "step": 7818 }, { "epoch": 0.8035549162642556, "grad_norm": 0.05893026292324066, "learning_rate": 0.01, "loss": 2.1184, "step": 7821 }, { "epoch": 0.8038631459981507, "grad_norm": 0.06398338079452515, "learning_rate": 0.01, "loss": 2.1057, "step": 7824 }, { "epoch": 0.8041713757320457, "grad_norm": 0.07129772752523422, "learning_rate": 0.01, "loss": 2.1056, "step": 7827 }, { "epoch": 0.8044796054659407, "grad_norm": 0.07481534779071808, "learning_rate": 0.01, "loss": 2.1272, "step": 7830 }, { "epoch": 0.8047878351998357, "grad_norm": 0.049200594425201416, "learning_rate": 0.01, "loss": 2.0942, "step": 7833 }, { "epoch": 0.8050960649337306, "grad_norm": 0.05124384164810181, "learning_rate": 0.01, "loss": 2.0859, "step": 7836 }, { "epoch": 0.8054042946676256, "grad_norm": 0.07997792959213257, "learning_rate": 0.01, "loss": 2.1412, "step": 7839 }, { "epoch": 0.8057125244015206, "grad_norm": 0.12280064076185226, "learning_rate": 0.01, "loss": 2.0826, "step": 7842 }, { "epoch": 0.8060207541354156, "grad_norm": 0.05292202904820442, "learning_rate": 0.01, "loss": 2.0965, "step": 7845 }, { "epoch": 0.8063289838693106, "grad_norm": 0.04903187230229378, "learning_rate": 0.01, "loss": 2.0911, "step": 7848 }, { "epoch": 0.8066372136032056, "grad_norm": 0.06882268935441971, "learning_rate": 0.01, "loss": 2.108, "step": 7851 }, { "epoch": 0.8069454433371006, "grad_norm": 0.06937083601951599, "learning_rate": 0.01, "loss": 2.1234, "step": 7854 }, { "epoch": 0.8072536730709956, "grad_norm": 0.10075647383928299, "learning_rate": 0.01, "loss": 2.0983, "step": 7857 }, { "epoch": 0.8075619028048906, "grad_norm": 0.07185733318328857, "learning_rate": 0.01, "loss": 2.0998, "step": 7860 }, { "epoch": 0.8078701325387856, "grad_norm": 0.07266184687614441, "learning_rate": 0.01, "loss": 2.1056, "step": 7863 }, { "epoch": 0.8081783622726806, "grad_norm": 0.05049808695912361, "learning_rate": 0.01, "loss": 2.1126, "step": 7866 }, { "epoch": 0.8084865920065756, "grad_norm": 0.07260838896036148, "learning_rate": 0.01, "loss": 2.1311, "step": 7869 }, { "epoch": 0.8087948217404706, "grad_norm": 0.0659325122833252, "learning_rate": 0.01, "loss": 2.1317, "step": 7872 }, { "epoch": 0.8091030514743656, "grad_norm": 0.056960709393024445, "learning_rate": 0.01, "loss": 2.0988, "step": 7875 }, { "epoch": 0.8094112812082606, "grad_norm": 0.1266620010137558, "learning_rate": 0.01, "loss": 2.1274, "step": 7878 }, { "epoch": 0.8097195109421556, "grad_norm": 0.05951874330639839, "learning_rate": 0.01, "loss": 2.1342, "step": 7881 }, { "epoch": 0.8100277406760505, "grad_norm": 0.06081915274262428, "learning_rate": 0.01, "loss": 2.1036, "step": 7884 }, { "epoch": 0.8103359704099455, "grad_norm": 0.07136547565460205, "learning_rate": 0.01, "loss": 2.1067, "step": 7887 }, { "epoch": 0.8106442001438405, "grad_norm": 0.08835722506046295, "learning_rate": 0.01, "loss": 2.1123, "step": 7890 }, { "epoch": 0.8109524298777355, "grad_norm": 0.04469553008675575, "learning_rate": 0.01, "loss": 2.1117, "step": 7893 }, { "epoch": 0.8112606596116305, "grad_norm": 0.042171087116003036, "learning_rate": 0.01, "loss": 2.0875, "step": 7896 }, { "epoch": 0.8115688893455255, "grad_norm": 0.0847015529870987, "learning_rate": 0.01, "loss": 2.0998, "step": 7899 }, { "epoch": 0.8118771190794205, "grad_norm": 0.09157509356737137, "learning_rate": 0.01, "loss": 2.121, "step": 7902 }, { "epoch": 0.8121853488133155, "grad_norm": 0.06001126766204834, "learning_rate": 0.01, "loss": 2.1441, "step": 7905 }, { "epoch": 0.8124935785472105, "grad_norm": 0.03552449122071266, "learning_rate": 0.01, "loss": 2.1371, "step": 7908 }, { "epoch": 0.8128018082811055, "grad_norm": 0.034304428845644, "learning_rate": 0.01, "loss": 2.1066, "step": 7911 }, { "epoch": 0.8131100380150005, "grad_norm": 0.04897907376289368, "learning_rate": 0.01, "loss": 2.1054, "step": 7914 }, { "epoch": 0.8134182677488955, "grad_norm": 0.06674344837665558, "learning_rate": 0.01, "loss": 2.1156, "step": 7917 }, { "epoch": 0.8137264974827905, "grad_norm": 0.06437379866838455, "learning_rate": 0.01, "loss": 2.1189, "step": 7920 }, { "epoch": 0.8140347272166855, "grad_norm": 0.06402087956666946, "learning_rate": 0.01, "loss": 2.1111, "step": 7923 }, { "epoch": 0.8143429569505805, "grad_norm": 0.11063557863235474, "learning_rate": 0.01, "loss": 2.1131, "step": 7926 }, { "epoch": 0.8146511866844754, "grad_norm": 0.10625256597995758, "learning_rate": 0.01, "loss": 2.113, "step": 7929 }, { "epoch": 0.8149594164183704, "grad_norm": 0.0682268813252449, "learning_rate": 0.01, "loss": 2.0929, "step": 7932 }, { "epoch": 0.8152676461522654, "grad_norm": 0.08721883594989777, "learning_rate": 0.01, "loss": 2.0878, "step": 7935 }, { "epoch": 0.8155758758861605, "grad_norm": 0.07372716814279556, "learning_rate": 0.01, "loss": 2.1173, "step": 7938 }, { "epoch": 0.8158841056200555, "grad_norm": 0.049299102276563644, "learning_rate": 0.01, "loss": 2.1172, "step": 7941 }, { "epoch": 0.8161923353539505, "grad_norm": 0.06552339345216751, "learning_rate": 0.01, "loss": 2.1035, "step": 7944 }, { "epoch": 0.8165005650878455, "grad_norm": 0.08362871408462524, "learning_rate": 0.01, "loss": 2.0942, "step": 7947 }, { "epoch": 0.8168087948217405, "grad_norm": 0.07610680162906647, "learning_rate": 0.01, "loss": 2.1026, "step": 7950 }, { "epoch": 0.8171170245556355, "grad_norm": 0.058830149471759796, "learning_rate": 0.01, "loss": 2.121, "step": 7953 }, { "epoch": 0.8174252542895305, "grad_norm": 0.10281010717153549, "learning_rate": 0.01, "loss": 2.1084, "step": 7956 }, { "epoch": 0.8177334840234255, "grad_norm": 0.04509102553129196, "learning_rate": 0.01, "loss": 2.0917, "step": 7959 }, { "epoch": 0.8180417137573205, "grad_norm": 0.034059979021549225, "learning_rate": 0.01, "loss": 2.1286, "step": 7962 }, { "epoch": 0.8183499434912155, "grad_norm": 0.09370562434196472, "learning_rate": 0.01, "loss": 2.1298, "step": 7965 }, { "epoch": 0.8186581732251105, "grad_norm": 0.09386254847049713, "learning_rate": 0.01, "loss": 2.1018, "step": 7968 }, { "epoch": 0.8189664029590055, "grad_norm": 0.1801362931728363, "learning_rate": 0.01, "loss": 2.1125, "step": 7971 }, { "epoch": 0.8192746326929005, "grad_norm": 0.12590090930461884, "learning_rate": 0.01, "loss": 2.1145, "step": 7974 }, { "epoch": 0.8195828624267955, "grad_norm": 0.09913074970245361, "learning_rate": 0.01, "loss": 2.1049, "step": 7977 }, { "epoch": 0.8198910921606904, "grad_norm": 0.05249069631099701, "learning_rate": 0.01, "loss": 2.1348, "step": 7980 }, { "epoch": 0.8201993218945854, "grad_norm": 0.05334639549255371, "learning_rate": 0.01, "loss": 2.0952, "step": 7983 }, { "epoch": 0.8205075516284804, "grad_norm": 0.03963373601436615, "learning_rate": 0.01, "loss": 2.1133, "step": 7986 }, { "epoch": 0.8208157813623754, "grad_norm": 0.03334924206137657, "learning_rate": 0.01, "loss": 2.1156, "step": 7989 }, { "epoch": 0.8211240110962704, "grad_norm": 0.0628419816493988, "learning_rate": 0.01, "loss": 2.1298, "step": 7992 }, { "epoch": 0.8214322408301654, "grad_norm": 0.07143758237361908, "learning_rate": 0.01, "loss": 2.0891, "step": 7995 }, { "epoch": 0.8217404705640604, "grad_norm": 0.06662650406360626, "learning_rate": 0.01, "loss": 2.0976, "step": 7998 }, { "epoch": 0.8220487002979554, "grad_norm": 0.10575726628303528, "learning_rate": 0.01, "loss": 2.0946, "step": 8001 }, { "epoch": 0.8223569300318504, "grad_norm": 0.056455157697200775, "learning_rate": 0.01, "loss": 2.0924, "step": 8004 }, { "epoch": 0.8226651597657454, "grad_norm": 0.10326797515153885, "learning_rate": 0.01, "loss": 2.0823, "step": 8007 }, { "epoch": 0.8229733894996404, "grad_norm": 0.08464314043521881, "learning_rate": 0.01, "loss": 2.1274, "step": 8010 }, { "epoch": 0.8232816192335354, "grad_norm": 0.052144117653369904, "learning_rate": 0.01, "loss": 2.0952, "step": 8013 }, { "epoch": 0.8235898489674304, "grad_norm": 0.05464213341474533, "learning_rate": 0.01, "loss": 2.1117, "step": 8016 }, { "epoch": 0.8238980787013254, "grad_norm": 0.06700276583433151, "learning_rate": 0.01, "loss": 2.1289, "step": 8019 }, { "epoch": 0.8242063084352204, "grad_norm": 0.05322539806365967, "learning_rate": 0.01, "loss": 2.1346, "step": 8022 }, { "epoch": 0.8245145381691154, "grad_norm": 0.040953267365694046, "learning_rate": 0.01, "loss": 2.1035, "step": 8025 }, { "epoch": 0.8248227679030103, "grad_norm": 0.043644580990076065, "learning_rate": 0.01, "loss": 2.1238, "step": 8028 }, { "epoch": 0.8251309976369053, "grad_norm": 0.060951683670282364, "learning_rate": 0.01, "loss": 2.1169, "step": 8031 }, { "epoch": 0.8254392273708003, "grad_norm": 0.11269400268793106, "learning_rate": 0.01, "loss": 2.1184, "step": 8034 }, { "epoch": 0.8257474571046953, "grad_norm": 0.05554080754518509, "learning_rate": 0.01, "loss": 2.112, "step": 8037 }, { "epoch": 0.8260556868385903, "grad_norm": 0.08155755698680878, "learning_rate": 0.01, "loss": 2.096, "step": 8040 }, { "epoch": 0.8263639165724853, "grad_norm": 0.0661015510559082, "learning_rate": 0.01, "loss": 2.0943, "step": 8043 }, { "epoch": 0.8266721463063803, "grad_norm": 0.06186169385910034, "learning_rate": 0.01, "loss": 2.1225, "step": 8046 }, { "epoch": 0.8269803760402753, "grad_norm": 0.06658541411161423, "learning_rate": 0.01, "loss": 2.1189, "step": 8049 }, { "epoch": 0.8272886057741703, "grad_norm": 0.09120085090398788, "learning_rate": 0.01, "loss": 2.1181, "step": 8052 }, { "epoch": 0.8275968355080654, "grad_norm": 0.059662993997335434, "learning_rate": 0.01, "loss": 2.1257, "step": 8055 }, { "epoch": 0.8279050652419604, "grad_norm": 0.08305416256189346, "learning_rate": 0.01, "loss": 2.1108, "step": 8058 }, { "epoch": 0.8282132949758554, "grad_norm": 0.047110967338085175, "learning_rate": 0.01, "loss": 2.0786, "step": 8061 }, { "epoch": 0.8285215247097504, "grad_norm": 0.042120445519685745, "learning_rate": 0.01, "loss": 2.1081, "step": 8064 }, { "epoch": 0.8288297544436454, "grad_norm": 0.04596862941980362, "learning_rate": 0.01, "loss": 2.1025, "step": 8067 }, { "epoch": 0.8291379841775404, "grad_norm": 0.055216096341609955, "learning_rate": 0.01, "loss": 2.1416, "step": 8070 }, { "epoch": 0.8294462139114354, "grad_norm": 0.05959683656692505, "learning_rate": 0.01, "loss": 2.0832, "step": 8073 }, { "epoch": 0.8297544436453304, "grad_norm": 0.045481909066438675, "learning_rate": 0.01, "loss": 2.1391, "step": 8076 }, { "epoch": 0.8300626733792253, "grad_norm": 0.049282100051641464, "learning_rate": 0.01, "loss": 2.1227, "step": 8079 }, { "epoch": 0.8303709031131203, "grad_norm": 0.058084890246391296, "learning_rate": 0.01, "loss": 2.1211, "step": 8082 }, { "epoch": 0.8306791328470153, "grad_norm": 0.11113768070936203, "learning_rate": 0.01, "loss": 2.132, "step": 8085 }, { "epoch": 0.8309873625809103, "grad_norm": 0.07015852630138397, "learning_rate": 0.01, "loss": 2.0988, "step": 8088 }, { "epoch": 0.8312955923148053, "grad_norm": 0.09509722143411636, "learning_rate": 0.01, "loss": 2.1064, "step": 8091 }, { "epoch": 0.8316038220487003, "grad_norm": 0.03616593778133392, "learning_rate": 0.01, "loss": 2.1078, "step": 8094 }, { "epoch": 0.8319120517825953, "grad_norm": 0.0486396960914135, "learning_rate": 0.01, "loss": 2.1279, "step": 8097 }, { "epoch": 0.8322202815164903, "grad_norm": 0.050647489726543427, "learning_rate": 0.01, "loss": 2.0808, "step": 8100 }, { "epoch": 0.8325285112503853, "grad_norm": 0.08125802874565125, "learning_rate": 0.01, "loss": 2.1099, "step": 8103 }, { "epoch": 0.8328367409842803, "grad_norm": 0.08078313618898392, "learning_rate": 0.01, "loss": 2.1162, "step": 8106 }, { "epoch": 0.8331449707181753, "grad_norm": 0.06157573312520981, "learning_rate": 0.01, "loss": 2.1288, "step": 8109 }, { "epoch": 0.8334532004520703, "grad_norm": 0.057771824300289154, "learning_rate": 0.01, "loss": 2.1136, "step": 8112 }, { "epoch": 0.8337614301859653, "grad_norm": 0.06634260714054108, "learning_rate": 0.01, "loss": 2.1029, "step": 8115 }, { "epoch": 0.8340696599198603, "grad_norm": 0.12186034023761749, "learning_rate": 0.01, "loss": 2.1236, "step": 8118 }, { "epoch": 0.8343778896537553, "grad_norm": 0.03940106928348541, "learning_rate": 0.01, "loss": 2.1169, "step": 8121 }, { "epoch": 0.8346861193876502, "grad_norm": 0.06003478914499283, "learning_rate": 0.01, "loss": 2.1108, "step": 8124 }, { "epoch": 0.8349943491215452, "grad_norm": 0.04963524639606476, "learning_rate": 0.01, "loss": 2.0893, "step": 8127 }, { "epoch": 0.8353025788554402, "grad_norm": 0.04543556645512581, "learning_rate": 0.01, "loss": 2.1062, "step": 8130 }, { "epoch": 0.8356108085893352, "grad_norm": 0.05210501328110695, "learning_rate": 0.01, "loss": 2.1065, "step": 8133 }, { "epoch": 0.8359190383232302, "grad_norm": 0.10866094380617142, "learning_rate": 0.01, "loss": 2.1154, "step": 8136 }, { "epoch": 0.8362272680571252, "grad_norm": 0.07595928758382797, "learning_rate": 0.01, "loss": 2.1079, "step": 8139 }, { "epoch": 0.8365354977910202, "grad_norm": 0.04948664829134941, "learning_rate": 0.01, "loss": 2.0955, "step": 8142 }, { "epoch": 0.8368437275249152, "grad_norm": 0.11719872057437897, "learning_rate": 0.01, "loss": 2.1106, "step": 8145 }, { "epoch": 0.8371519572588102, "grad_norm": 0.04469067603349686, "learning_rate": 0.01, "loss": 2.1145, "step": 8148 }, { "epoch": 0.8374601869927052, "grad_norm": 0.038385387510061264, "learning_rate": 0.01, "loss": 2.0933, "step": 8151 }, { "epoch": 0.8377684167266002, "grad_norm": 0.04040665924549103, "learning_rate": 0.01, "loss": 2.1119, "step": 8154 }, { "epoch": 0.8380766464604952, "grad_norm": 0.042900413274765015, "learning_rate": 0.01, "loss": 2.1053, "step": 8157 }, { "epoch": 0.8383848761943902, "grad_norm": 0.06709393113851547, "learning_rate": 0.01, "loss": 2.0858, "step": 8160 }, { "epoch": 0.8386931059282852, "grad_norm": 0.08388926833868027, "learning_rate": 0.01, "loss": 2.1287, "step": 8163 }, { "epoch": 0.8390013356621802, "grad_norm": 0.0701015368103981, "learning_rate": 0.01, "loss": 2.0965, "step": 8166 }, { "epoch": 0.8393095653960753, "grad_norm": 0.0841115415096283, "learning_rate": 0.01, "loss": 2.1136, "step": 8169 }, { "epoch": 0.8396177951299703, "grad_norm": 0.08542285114526749, "learning_rate": 0.01, "loss": 2.1166, "step": 8172 }, { "epoch": 0.8399260248638653, "grad_norm": 0.046626705676317215, "learning_rate": 0.01, "loss": 2.1318, "step": 8175 }, { "epoch": 0.8402342545977602, "grad_norm": 0.08752947300672531, "learning_rate": 0.01, "loss": 2.0873, "step": 8178 }, { "epoch": 0.8405424843316552, "grad_norm": 0.04728331416845322, "learning_rate": 0.01, "loss": 2.0951, "step": 8181 }, { "epoch": 0.8408507140655502, "grad_norm": 0.04881293699145317, "learning_rate": 0.01, "loss": 2.1011, "step": 8184 }, { "epoch": 0.8411589437994452, "grad_norm": 0.049758728593587875, "learning_rate": 0.01, "loss": 2.1206, "step": 8187 }, { "epoch": 0.8414671735333402, "grad_norm": 0.037589117884635925, "learning_rate": 0.01, "loss": 2.085, "step": 8190 }, { "epoch": 0.8417754032672352, "grad_norm": 0.11785265803337097, "learning_rate": 0.01, "loss": 2.1195, "step": 8193 }, { "epoch": 0.8420836330011302, "grad_norm": 0.09815037995576859, "learning_rate": 0.01, "loss": 2.065, "step": 8196 }, { "epoch": 0.8423918627350252, "grad_norm": 0.07950727641582489, "learning_rate": 0.01, "loss": 2.1081, "step": 8199 }, { "epoch": 0.8427000924689202, "grad_norm": 0.04057254642248154, "learning_rate": 0.01, "loss": 2.0882, "step": 8202 }, { "epoch": 0.8430083222028152, "grad_norm": 0.07260222733020782, "learning_rate": 0.01, "loss": 2.1018, "step": 8205 }, { "epoch": 0.8433165519367102, "grad_norm": 0.057693734765052795, "learning_rate": 0.01, "loss": 2.112, "step": 8208 }, { "epoch": 0.8436247816706052, "grad_norm": 0.039680637419223785, "learning_rate": 0.01, "loss": 2.0974, "step": 8211 }, { "epoch": 0.8439330114045002, "grad_norm": 0.07584577798843384, "learning_rate": 0.01, "loss": 2.129, "step": 8214 }, { "epoch": 0.8442412411383952, "grad_norm": 0.044016819447278976, "learning_rate": 0.01, "loss": 2.1139, "step": 8217 }, { "epoch": 0.8445494708722902, "grad_norm": 0.04449582099914551, "learning_rate": 0.01, "loss": 2.1085, "step": 8220 }, { "epoch": 0.8448577006061851, "grad_norm": 0.044676005840301514, "learning_rate": 0.01, "loss": 2.1081, "step": 8223 }, { "epoch": 0.8451659303400801, "grad_norm": 0.04926025867462158, "learning_rate": 0.01, "loss": 2.1187, "step": 8226 }, { "epoch": 0.8454741600739751, "grad_norm": 0.10964366793632507, "learning_rate": 0.01, "loss": 2.0898, "step": 8229 }, { "epoch": 0.8457823898078701, "grad_norm": 0.09405852109193802, "learning_rate": 0.01, "loss": 2.1117, "step": 8232 }, { "epoch": 0.8460906195417651, "grad_norm": 0.09241268038749695, "learning_rate": 0.01, "loss": 2.1225, "step": 8235 }, { "epoch": 0.8463988492756601, "grad_norm": 0.05887102335691452, "learning_rate": 0.01, "loss": 2.0944, "step": 8238 }, { "epoch": 0.8467070790095551, "grad_norm": 0.13513131439685822, "learning_rate": 0.01, "loss": 2.1217, "step": 8241 }, { "epoch": 0.8470153087434501, "grad_norm": 0.06370443850755692, "learning_rate": 0.01, "loss": 2.121, "step": 8244 }, { "epoch": 0.8473235384773451, "grad_norm": 0.0426030196249485, "learning_rate": 0.01, "loss": 2.0937, "step": 8247 }, { "epoch": 0.8476317682112401, "grad_norm": 0.049412764608860016, "learning_rate": 0.01, "loss": 2.108, "step": 8250 }, { "epoch": 0.8479399979451351, "grad_norm": 0.061407607048749924, "learning_rate": 0.01, "loss": 2.1009, "step": 8253 }, { "epoch": 0.8482482276790301, "grad_norm": 0.12416908144950867, "learning_rate": 0.01, "loss": 2.0819, "step": 8256 }, { "epoch": 0.8485564574129251, "grad_norm": 0.05728744715452194, "learning_rate": 0.01, "loss": 2.1132, "step": 8259 }, { "epoch": 0.8488646871468201, "grad_norm": 0.06724981963634491, "learning_rate": 0.01, "loss": 2.0932, "step": 8262 }, { "epoch": 0.8491729168807151, "grad_norm": 0.055260930210351944, "learning_rate": 0.01, "loss": 2.0933, "step": 8265 }, { "epoch": 0.84948114661461, "grad_norm": 0.04230106249451637, "learning_rate": 0.01, "loss": 2.1135, "step": 8268 }, { "epoch": 0.849789376348505, "grad_norm": 0.04593104496598244, "learning_rate": 0.01, "loss": 2.1091, "step": 8271 }, { "epoch": 0.8500976060824, "grad_norm": 0.09625285863876343, "learning_rate": 0.01, "loss": 2.0946, "step": 8274 }, { "epoch": 0.850405835816295, "grad_norm": 0.04556501284241676, "learning_rate": 0.01, "loss": 2.0975, "step": 8277 }, { "epoch": 0.85071406555019, "grad_norm": 0.09413543343544006, "learning_rate": 0.01, "loss": 2.0863, "step": 8280 }, { "epoch": 0.851022295284085, "grad_norm": 0.08400101214647293, "learning_rate": 0.01, "loss": 2.1045, "step": 8283 }, { "epoch": 0.8513305250179801, "grad_norm": 0.06278138607740402, "learning_rate": 0.01, "loss": 2.116, "step": 8286 }, { "epoch": 0.8516387547518751, "grad_norm": 0.04442959651350975, "learning_rate": 0.01, "loss": 2.0796, "step": 8289 }, { "epoch": 0.8519469844857701, "grad_norm": 0.045807912945747375, "learning_rate": 0.01, "loss": 2.0823, "step": 8292 }, { "epoch": 0.8522552142196651, "grad_norm": 0.0426551029086113, "learning_rate": 0.01, "loss": 2.1109, "step": 8295 }, { "epoch": 0.8525634439535601, "grad_norm": 0.12200357019901276, "learning_rate": 0.01, "loss": 2.1146, "step": 8298 }, { "epoch": 0.8528716736874551, "grad_norm": 0.04152747616171837, "learning_rate": 0.01, "loss": 2.1204, "step": 8301 }, { "epoch": 0.8531799034213501, "grad_norm": 0.08464021235704422, "learning_rate": 0.01, "loss": 2.085, "step": 8304 }, { "epoch": 0.8534881331552451, "grad_norm": 0.050391390919685364, "learning_rate": 0.01, "loss": 2.0957, "step": 8307 }, { "epoch": 0.8537963628891401, "grad_norm": 0.08581732958555222, "learning_rate": 0.01, "loss": 2.1015, "step": 8310 }, { "epoch": 0.8541045926230351, "grad_norm": 0.10992308706045151, "learning_rate": 0.01, "loss": 2.0939, "step": 8313 }, { "epoch": 0.8544128223569301, "grad_norm": 0.053225912153720856, "learning_rate": 0.01, "loss": 2.1106, "step": 8316 }, { "epoch": 0.854721052090825, "grad_norm": 0.06759096682071686, "learning_rate": 0.01, "loss": 2.1035, "step": 8319 }, { "epoch": 0.85502928182472, "grad_norm": 0.058069922029972076, "learning_rate": 0.01, "loss": 2.0833, "step": 8322 }, { "epoch": 0.855337511558615, "grad_norm": 0.0657680481672287, "learning_rate": 0.01, "loss": 2.0918, "step": 8325 }, { "epoch": 0.85564574129251, "grad_norm": 0.1428556591272354, "learning_rate": 0.01, "loss": 2.0895, "step": 8328 }, { "epoch": 0.855953971026405, "grad_norm": 0.10311869531869888, "learning_rate": 0.01, "loss": 2.1041, "step": 8331 }, { "epoch": 0.8562622007603, "grad_norm": 0.12024179100990295, "learning_rate": 0.01, "loss": 2.1179, "step": 8334 }, { "epoch": 0.856570430494195, "grad_norm": 0.08294446766376495, "learning_rate": 0.01, "loss": 2.1042, "step": 8337 }, { "epoch": 0.85687866022809, "grad_norm": 0.05203935503959656, "learning_rate": 0.01, "loss": 2.1275, "step": 8340 }, { "epoch": 0.857186889961985, "grad_norm": 0.061564356088638306, "learning_rate": 0.01, "loss": 2.0951, "step": 8343 }, { "epoch": 0.85749511969588, "grad_norm": 0.043616339564323425, "learning_rate": 0.01, "loss": 2.1082, "step": 8346 }, { "epoch": 0.857803349429775, "grad_norm": 0.0885004997253418, "learning_rate": 0.01, "loss": 2.1071, "step": 8349 }, { "epoch": 0.85811157916367, "grad_norm": 0.06275481730699539, "learning_rate": 0.01, "loss": 2.137, "step": 8352 }, { "epoch": 0.858419808897565, "grad_norm": 0.054776523262262344, "learning_rate": 0.01, "loss": 2.1117, "step": 8355 }, { "epoch": 0.85872803863146, "grad_norm": 0.07782801240682602, "learning_rate": 0.01, "loss": 2.0822, "step": 8358 }, { "epoch": 0.859036268365355, "grad_norm": 0.12301263958215714, "learning_rate": 0.01, "loss": 2.1126, "step": 8361 }, { "epoch": 0.85934449809925, "grad_norm": 0.07181745767593384, "learning_rate": 0.01, "loss": 2.1359, "step": 8364 }, { "epoch": 0.859652727833145, "grad_norm": 0.07232604175806046, "learning_rate": 0.01, "loss": 2.0849, "step": 8367 }, { "epoch": 0.8599609575670399, "grad_norm": 0.06810937821865082, "learning_rate": 0.01, "loss": 2.1088, "step": 8370 }, { "epoch": 0.8602691873009349, "grad_norm": 0.048163384199142456, "learning_rate": 0.01, "loss": 2.1173, "step": 8373 }, { "epoch": 0.8605774170348299, "grad_norm": 0.05688156560063362, "learning_rate": 0.01, "loss": 2.114, "step": 8376 }, { "epoch": 0.8608856467687249, "grad_norm": 0.065540611743927, "learning_rate": 0.01, "loss": 2.0989, "step": 8379 }, { "epoch": 0.8611938765026199, "grad_norm": 0.09561596065759659, "learning_rate": 0.01, "loss": 2.0894, "step": 8382 }, { "epoch": 0.8615021062365149, "grad_norm": 0.06719313561916351, "learning_rate": 0.01, "loss": 2.0928, "step": 8385 }, { "epoch": 0.8618103359704099, "grad_norm": 0.05895761027932167, "learning_rate": 0.01, "loss": 2.1037, "step": 8388 }, { "epoch": 0.8621185657043049, "grad_norm": 0.09232669323682785, "learning_rate": 0.01, "loss": 2.1272, "step": 8391 }, { "epoch": 0.8624267954381999, "grad_norm": 0.06715840846300125, "learning_rate": 0.01, "loss": 2.072, "step": 8394 }, { "epoch": 0.8627350251720949, "grad_norm": 0.04794420674443245, "learning_rate": 0.01, "loss": 2.1087, "step": 8397 }, { "epoch": 0.8630432549059899, "grad_norm": 0.037383124232292175, "learning_rate": 0.01, "loss": 2.0761, "step": 8400 }, { "epoch": 0.863351484639885, "grad_norm": 0.05601905286312103, "learning_rate": 0.01, "loss": 2.0926, "step": 8403 }, { "epoch": 0.86365971437378, "grad_norm": 0.0839313194155693, "learning_rate": 0.01, "loss": 2.0887, "step": 8406 }, { "epoch": 0.863967944107675, "grad_norm": 0.07600929588079453, "learning_rate": 0.01, "loss": 2.1143, "step": 8409 }, { "epoch": 0.86427617384157, "grad_norm": 0.06851659715175629, "learning_rate": 0.01, "loss": 2.0921, "step": 8412 }, { "epoch": 0.864584403575465, "grad_norm": 0.05021858587861061, "learning_rate": 0.01, "loss": 2.0903, "step": 8415 }, { "epoch": 0.86489263330936, "grad_norm": 0.04881426692008972, "learning_rate": 0.01, "loss": 2.1047, "step": 8418 }, { "epoch": 0.8652008630432549, "grad_norm": 0.04262546822428703, "learning_rate": 0.01, "loss": 2.0852, "step": 8421 }, { "epoch": 0.8655090927771499, "grad_norm": 0.050467535853385925, "learning_rate": 0.01, "loss": 2.1045, "step": 8424 }, { "epoch": 0.8658173225110449, "grad_norm": 0.0725008100271225, "learning_rate": 0.01, "loss": 2.077, "step": 8427 }, { "epoch": 0.8661255522449399, "grad_norm": 0.07234456390142441, "learning_rate": 0.01, "loss": 2.13, "step": 8430 }, { "epoch": 0.8664337819788349, "grad_norm": 0.060751501470804214, "learning_rate": 0.01, "loss": 2.0948, "step": 8433 }, { "epoch": 0.8667420117127299, "grad_norm": 0.058911584317684174, "learning_rate": 0.01, "loss": 2.0908, "step": 8436 }, { "epoch": 0.8670502414466249, "grad_norm": 0.08380532264709473, "learning_rate": 0.01, "loss": 2.1438, "step": 8439 }, { "epoch": 0.8673584711805199, "grad_norm": 0.058240536600351334, "learning_rate": 0.01, "loss": 2.1384, "step": 8442 }, { "epoch": 0.8676667009144149, "grad_norm": 0.0422792062163353, "learning_rate": 0.01, "loss": 2.0926, "step": 8445 }, { "epoch": 0.8679749306483099, "grad_norm": 0.07096652686595917, "learning_rate": 0.01, "loss": 2.1195, "step": 8448 }, { "epoch": 0.8682831603822049, "grad_norm": 0.13370642066001892, "learning_rate": 0.01, "loss": 2.1367, "step": 8451 }, { "epoch": 0.8685913901160999, "grad_norm": 0.0597628615796566, "learning_rate": 0.01, "loss": 2.087, "step": 8454 }, { "epoch": 0.8688996198499949, "grad_norm": 0.039561979472637177, "learning_rate": 0.01, "loss": 2.112, "step": 8457 }, { "epoch": 0.8692078495838899, "grad_norm": 0.04080485925078392, "learning_rate": 0.01, "loss": 2.1024, "step": 8460 }, { "epoch": 0.8695160793177849, "grad_norm": 0.05293022468686104, "learning_rate": 0.01, "loss": 2.0731, "step": 8463 }, { "epoch": 0.8698243090516798, "grad_norm": 0.06960830092430115, "learning_rate": 0.01, "loss": 2.1255, "step": 8466 }, { "epoch": 0.8701325387855748, "grad_norm": 0.09768849611282349, "learning_rate": 0.01, "loss": 2.1217, "step": 8469 }, { "epoch": 0.8704407685194698, "grad_norm": 0.11970885097980499, "learning_rate": 0.01, "loss": 2.0932, "step": 8472 }, { "epoch": 0.8707489982533648, "grad_norm": 0.12014521658420563, "learning_rate": 0.01, "loss": 2.1009, "step": 8475 }, { "epoch": 0.8710572279872598, "grad_norm": 0.04288540408015251, "learning_rate": 0.01, "loss": 2.1111, "step": 8478 }, { "epoch": 0.8713654577211548, "grad_norm": 0.033004507422447205, "learning_rate": 0.01, "loss": 2.1029, "step": 8481 }, { "epoch": 0.8716736874550498, "grad_norm": 0.03685779869556427, "learning_rate": 0.01, "loss": 2.1077, "step": 8484 }, { "epoch": 0.8719819171889448, "grad_norm": 0.06450948119163513, "learning_rate": 0.01, "loss": 2.102, "step": 8487 }, { "epoch": 0.8722901469228398, "grad_norm": 0.04806706681847572, "learning_rate": 0.01, "loss": 2.1056, "step": 8490 }, { "epoch": 0.8725983766567348, "grad_norm": 0.05847964435815811, "learning_rate": 0.01, "loss": 2.095, "step": 8493 }, { "epoch": 0.8729066063906298, "grad_norm": 0.11569567024707794, "learning_rate": 0.01, "loss": 2.1058, "step": 8496 }, { "epoch": 0.8732148361245248, "grad_norm": 0.04440119490027428, "learning_rate": 0.01, "loss": 2.1127, "step": 8499 }, { "epoch": 0.8735230658584198, "grad_norm": 0.13856938481330872, "learning_rate": 0.01, "loss": 2.1072, "step": 8502 }, { "epoch": 0.8738312955923148, "grad_norm": 0.06448937207460403, "learning_rate": 0.01, "loss": 2.0813, "step": 8505 }, { "epoch": 0.8741395253262098, "grad_norm": 0.05872811749577522, "learning_rate": 0.01, "loss": 2.1227, "step": 8508 }, { "epoch": 0.8744477550601047, "grad_norm": 0.06387540698051453, "learning_rate": 0.01, "loss": 2.099, "step": 8511 }, { "epoch": 0.8747559847939997, "grad_norm": 0.044399481266736984, "learning_rate": 0.01, "loss": 2.0989, "step": 8514 }, { "epoch": 0.8750642145278948, "grad_norm": 0.118850938975811, "learning_rate": 0.01, "loss": 2.1261, "step": 8517 }, { "epoch": 0.8753724442617898, "grad_norm": 0.05479248985648155, "learning_rate": 0.01, "loss": 2.0701, "step": 8520 }, { "epoch": 0.8756806739956848, "grad_norm": 0.06442543119192123, "learning_rate": 0.01, "loss": 2.0844, "step": 8523 }, { "epoch": 0.8759889037295798, "grad_norm": 0.054294027388095856, "learning_rate": 0.01, "loss": 2.1051, "step": 8526 }, { "epoch": 0.8762971334634748, "grad_norm": 0.04776893928647041, "learning_rate": 0.01, "loss": 2.1056, "step": 8529 }, { "epoch": 0.8766053631973698, "grad_norm": 0.06740310043096542, "learning_rate": 0.01, "loss": 2.0956, "step": 8532 }, { "epoch": 0.8769135929312648, "grad_norm": 0.048034511506557465, "learning_rate": 0.01, "loss": 2.1223, "step": 8535 }, { "epoch": 0.8772218226651598, "grad_norm": 0.05819391459226608, "learning_rate": 0.01, "loss": 2.1133, "step": 8538 }, { "epoch": 0.8775300523990548, "grad_norm": 0.06093437224626541, "learning_rate": 0.01, "loss": 2.0889, "step": 8541 }, { "epoch": 0.8778382821329498, "grad_norm": 0.04628787562251091, "learning_rate": 0.01, "loss": 2.1202, "step": 8544 }, { "epoch": 0.8781465118668448, "grad_norm": 0.0903085321187973, "learning_rate": 0.01, "loss": 2.0495, "step": 8547 }, { "epoch": 0.8784547416007398, "grad_norm": 0.06924945116043091, "learning_rate": 0.01, "loss": 2.1004, "step": 8550 }, { "epoch": 0.8787629713346348, "grad_norm": 0.04104374721646309, "learning_rate": 0.01, "loss": 2.0954, "step": 8553 }, { "epoch": 0.8790712010685298, "grad_norm": 0.11671441793441772, "learning_rate": 0.01, "loss": 2.1027, "step": 8556 }, { "epoch": 0.8793794308024248, "grad_norm": 0.10247964411973953, "learning_rate": 0.01, "loss": 2.0861, "step": 8559 }, { "epoch": 0.8796876605363197, "grad_norm": 0.03979288041591644, "learning_rate": 0.01, "loss": 2.1307, "step": 8562 }, { "epoch": 0.8799958902702147, "grad_norm": 0.0406351312994957, "learning_rate": 0.01, "loss": 2.0868, "step": 8565 }, { "epoch": 0.8803041200041097, "grad_norm": 0.04127006232738495, "learning_rate": 0.01, "loss": 2.0899, "step": 8568 }, { "epoch": 0.8806123497380047, "grad_norm": 0.04559047520160675, "learning_rate": 0.01, "loss": 2.1071, "step": 8571 }, { "epoch": 0.8809205794718997, "grad_norm": 0.12507610023021698, "learning_rate": 0.01, "loss": 2.0944, "step": 8574 }, { "epoch": 0.8812288092057947, "grad_norm": 0.042683400213718414, "learning_rate": 0.01, "loss": 2.078, "step": 8577 }, { "epoch": 0.8815370389396897, "grad_norm": 0.04022818058729172, "learning_rate": 0.01, "loss": 2.0797, "step": 8580 }, { "epoch": 0.8818452686735847, "grad_norm": 0.0382862351834774, "learning_rate": 0.01, "loss": 2.0859, "step": 8583 }, { "epoch": 0.8821534984074797, "grad_norm": 0.05260771885514259, "learning_rate": 0.01, "loss": 2.0832, "step": 8586 }, { "epoch": 0.8824617281413747, "grad_norm": 0.05381648615002632, "learning_rate": 0.01, "loss": 2.1211, "step": 8589 }, { "epoch": 0.8827699578752697, "grad_norm": 0.055818814784288406, "learning_rate": 0.01, "loss": 2.1108, "step": 8592 }, { "epoch": 0.8830781876091647, "grad_norm": 0.16680215299129486, "learning_rate": 0.01, "loss": 2.0961, "step": 8595 }, { "epoch": 0.8833864173430597, "grad_norm": 0.10034742951393127, "learning_rate": 0.01, "loss": 2.1187, "step": 8598 }, { "epoch": 0.8836946470769547, "grad_norm": 0.0827341303229332, "learning_rate": 0.01, "loss": 2.1112, "step": 8601 }, { "epoch": 0.8840028768108497, "grad_norm": 0.07657956331968307, "learning_rate": 0.01, "loss": 2.0711, "step": 8604 }, { "epoch": 0.8843111065447447, "grad_norm": 0.036220960319042206, "learning_rate": 0.01, "loss": 2.1097, "step": 8607 }, { "epoch": 0.8846193362786396, "grad_norm": 0.04672658443450928, "learning_rate": 0.01, "loss": 2.1099, "step": 8610 }, { "epoch": 0.8849275660125346, "grad_norm": 0.04827800393104553, "learning_rate": 0.01, "loss": 2.1081, "step": 8613 }, { "epoch": 0.8852357957464296, "grad_norm": 0.04962724447250366, "learning_rate": 0.01, "loss": 2.0895, "step": 8616 }, { "epoch": 0.8855440254803246, "grad_norm": 0.03474809601902962, "learning_rate": 0.01, "loss": 2.0942, "step": 8619 }, { "epoch": 0.8858522552142196, "grad_norm": 0.07395246624946594, "learning_rate": 0.01, "loss": 2.1145, "step": 8622 }, { "epoch": 0.8861604849481146, "grad_norm": 0.09853484481573105, "learning_rate": 0.01, "loss": 2.0991, "step": 8625 }, { "epoch": 0.8864687146820096, "grad_norm": 0.11892013251781464, "learning_rate": 0.01, "loss": 2.0968, "step": 8628 }, { "epoch": 0.8867769444159046, "grad_norm": 0.12780621647834778, "learning_rate": 0.01, "loss": 2.1154, "step": 8631 }, { "epoch": 0.8870851741497997, "grad_norm": 0.04470033943653107, "learning_rate": 0.01, "loss": 2.1027, "step": 8634 }, { "epoch": 0.8873934038836947, "grad_norm": 0.054323747754096985, "learning_rate": 0.01, "loss": 2.0952, "step": 8637 }, { "epoch": 0.8877016336175897, "grad_norm": 0.08175788819789886, "learning_rate": 0.01, "loss": 2.0882, "step": 8640 }, { "epoch": 0.8880098633514847, "grad_norm": 0.07456079125404358, "learning_rate": 0.01, "loss": 2.141, "step": 8643 }, { "epoch": 0.8883180930853797, "grad_norm": 0.055910736322402954, "learning_rate": 0.01, "loss": 2.1102, "step": 8646 }, { "epoch": 0.8886263228192747, "grad_norm": 0.05231192335486412, "learning_rate": 0.01, "loss": 2.1026, "step": 8649 }, { "epoch": 0.8889345525531697, "grad_norm": 0.05306578800082207, "learning_rate": 0.01, "loss": 2.1051, "step": 8652 }, { "epoch": 0.8892427822870647, "grad_norm": 0.05569072067737579, "learning_rate": 0.01, "loss": 2.0835, "step": 8655 }, { "epoch": 0.8895510120209597, "grad_norm": 0.050971515476703644, "learning_rate": 0.01, "loss": 2.0718, "step": 8658 }, { "epoch": 0.8898592417548546, "grad_norm": 0.061436936259269714, "learning_rate": 0.01, "loss": 2.1167, "step": 8661 }, { "epoch": 0.8901674714887496, "grad_norm": 0.04307536780834198, "learning_rate": 0.01, "loss": 2.0972, "step": 8664 }, { "epoch": 0.8904757012226446, "grad_norm": 0.1459832638502121, "learning_rate": 0.01, "loss": 2.1306, "step": 8667 }, { "epoch": 0.8907839309565396, "grad_norm": 0.05527958646416664, "learning_rate": 0.01, "loss": 2.0974, "step": 8670 }, { "epoch": 0.8910921606904346, "grad_norm": 0.1319393813610077, "learning_rate": 0.01, "loss": 2.1259, "step": 8673 }, { "epoch": 0.8914003904243296, "grad_norm": 0.06124665215611458, "learning_rate": 0.01, "loss": 2.0997, "step": 8676 }, { "epoch": 0.8917086201582246, "grad_norm": 0.08667455613613129, "learning_rate": 0.01, "loss": 2.0941, "step": 8679 }, { "epoch": 0.8920168498921196, "grad_norm": 0.06631213426589966, "learning_rate": 0.01, "loss": 2.1196, "step": 8682 }, { "epoch": 0.8923250796260146, "grad_norm": 0.060188647359609604, "learning_rate": 0.01, "loss": 2.0971, "step": 8685 }, { "epoch": 0.8926333093599096, "grad_norm": 0.039312943816185, "learning_rate": 0.01, "loss": 2.1119, "step": 8688 }, { "epoch": 0.8929415390938046, "grad_norm": 0.03959662839770317, "learning_rate": 0.01, "loss": 2.0897, "step": 8691 }, { "epoch": 0.8932497688276996, "grad_norm": 0.09711046516895294, "learning_rate": 0.01, "loss": 2.1133, "step": 8694 }, { "epoch": 0.8935579985615946, "grad_norm": 0.07965920865535736, "learning_rate": 0.01, "loss": 2.0635, "step": 8697 }, { "epoch": 0.8938662282954896, "grad_norm": 0.08770687133073807, "learning_rate": 0.01, "loss": 2.0885, "step": 8700 }, { "epoch": 0.8941744580293846, "grad_norm": 0.04591045528650284, "learning_rate": 0.01, "loss": 2.0926, "step": 8703 }, { "epoch": 0.8944826877632795, "grad_norm": 0.09602218866348267, "learning_rate": 0.01, "loss": 2.0856, "step": 8706 }, { "epoch": 0.8947909174971745, "grad_norm": 0.09482742100954056, "learning_rate": 0.01, "loss": 2.0966, "step": 8709 }, { "epoch": 0.8950991472310695, "grad_norm": 0.03937089815735817, "learning_rate": 0.01, "loss": 2.1043, "step": 8712 }, { "epoch": 0.8954073769649645, "grad_norm": 0.056832704693078995, "learning_rate": 0.01, "loss": 2.1165, "step": 8715 }, { "epoch": 0.8957156066988595, "grad_norm": 0.06370353698730469, "learning_rate": 0.01, "loss": 2.1144, "step": 8718 }, { "epoch": 0.8960238364327545, "grad_norm": 0.06752549856901169, "learning_rate": 0.01, "loss": 2.1026, "step": 8721 }, { "epoch": 0.8963320661666495, "grad_norm": 0.13301892578601837, "learning_rate": 0.01, "loss": 2.11, "step": 8724 }, { "epoch": 0.8966402959005445, "grad_norm": 0.05210836976766586, "learning_rate": 0.01, "loss": 2.0925, "step": 8727 }, { "epoch": 0.8969485256344395, "grad_norm": 0.03570270165801048, "learning_rate": 0.01, "loss": 2.0809, "step": 8730 }, { "epoch": 0.8972567553683345, "grad_norm": 0.05898820236325264, "learning_rate": 0.01, "loss": 2.0786, "step": 8733 }, { "epoch": 0.8975649851022295, "grad_norm": 0.05087563395500183, "learning_rate": 0.01, "loss": 2.1071, "step": 8736 }, { "epoch": 0.8978732148361245, "grad_norm": 0.09473355114459991, "learning_rate": 0.01, "loss": 2.103, "step": 8739 }, { "epoch": 0.8981814445700195, "grad_norm": 0.09793075919151306, "learning_rate": 0.01, "loss": 2.0972, "step": 8742 }, { "epoch": 0.8984896743039145, "grad_norm": 0.05115204304456711, "learning_rate": 0.01, "loss": 2.0979, "step": 8745 }, { "epoch": 0.8987979040378095, "grad_norm": 0.057413987815380096, "learning_rate": 0.01, "loss": 2.1156, "step": 8748 }, { "epoch": 0.8991061337717046, "grad_norm": 0.04136224836111069, "learning_rate": 0.01, "loss": 2.1269, "step": 8751 }, { "epoch": 0.8994143635055996, "grad_norm": 0.06866753846406937, "learning_rate": 0.01, "loss": 2.1092, "step": 8754 }, { "epoch": 0.8997225932394946, "grad_norm": 0.0757627934217453, "learning_rate": 0.01, "loss": 2.0933, "step": 8757 }, { "epoch": 0.9000308229733895, "grad_norm": 0.08082983642816544, "learning_rate": 0.01, "loss": 2.1124, "step": 8760 }, { "epoch": 0.9003390527072845, "grad_norm": 0.046828944236040115, "learning_rate": 0.01, "loss": 2.0978, "step": 8763 }, { "epoch": 0.9006472824411795, "grad_norm": 0.11039458215236664, "learning_rate": 0.01, "loss": 2.0989, "step": 8766 }, { "epoch": 0.9009555121750745, "grad_norm": 0.048537638038396835, "learning_rate": 0.01, "loss": 2.0946, "step": 8769 }, { "epoch": 0.9012637419089695, "grad_norm": 0.06700310111045837, "learning_rate": 0.01, "loss": 2.1184, "step": 8772 }, { "epoch": 0.9015719716428645, "grad_norm": 0.044369909912347794, "learning_rate": 0.01, "loss": 2.1026, "step": 8775 }, { "epoch": 0.9018802013767595, "grad_norm": 0.041071876883506775, "learning_rate": 0.01, "loss": 2.0774, "step": 8778 }, { "epoch": 0.9021884311106545, "grad_norm": 0.04735315591096878, "learning_rate": 0.01, "loss": 2.0812, "step": 8781 }, { "epoch": 0.9024966608445495, "grad_norm": 0.11621284484863281, "learning_rate": 0.01, "loss": 2.0766, "step": 8784 }, { "epoch": 0.9028048905784445, "grad_norm": 0.11453153938055038, "learning_rate": 0.01, "loss": 2.0866, "step": 8787 }, { "epoch": 0.9031131203123395, "grad_norm": 0.057418763637542725, "learning_rate": 0.01, "loss": 2.081, "step": 8790 }, { "epoch": 0.9034213500462345, "grad_norm": 0.041579000651836395, "learning_rate": 0.01, "loss": 2.1154, "step": 8793 }, { "epoch": 0.9037295797801295, "grad_norm": 0.045673951506614685, "learning_rate": 0.01, "loss": 2.1241, "step": 8796 }, { "epoch": 0.9040378095140245, "grad_norm": 0.05963718518614769, "learning_rate": 0.01, "loss": 2.0955, "step": 8799 }, { "epoch": 0.9043460392479195, "grad_norm": 0.04776541888713837, "learning_rate": 0.01, "loss": 2.1138, "step": 8802 }, { "epoch": 0.9046542689818144, "grad_norm": 0.09103482216596603, "learning_rate": 0.01, "loss": 2.1192, "step": 8805 }, { "epoch": 0.9049624987157094, "grad_norm": 0.09218809008598328, "learning_rate": 0.01, "loss": 2.0985, "step": 8808 }, { "epoch": 0.9052707284496044, "grad_norm": 0.10253725945949554, "learning_rate": 0.01, "loss": 2.1189, "step": 8811 }, { "epoch": 0.9055789581834994, "grad_norm": 0.09638465940952301, "learning_rate": 0.01, "loss": 2.1008, "step": 8814 }, { "epoch": 0.9058871879173944, "grad_norm": 0.0947449579834938, "learning_rate": 0.01, "loss": 2.1222, "step": 8817 }, { "epoch": 0.9061954176512894, "grad_norm": 0.04588090255856514, "learning_rate": 0.01, "loss": 2.1198, "step": 8820 }, { "epoch": 0.9065036473851844, "grad_norm": 0.05041109770536423, "learning_rate": 0.01, "loss": 2.0843, "step": 8823 }, { "epoch": 0.9068118771190794, "grad_norm": 0.038898076862096786, "learning_rate": 0.01, "loss": 2.125, "step": 8826 }, { "epoch": 0.9071201068529744, "grad_norm": 0.03356321156024933, "learning_rate": 0.01, "loss": 2.0985, "step": 8829 }, { "epoch": 0.9074283365868694, "grad_norm": 0.04668448120355606, "learning_rate": 0.01, "loss": 2.1071, "step": 8832 }, { "epoch": 0.9077365663207644, "grad_norm": 0.051277391612529755, "learning_rate": 0.01, "loss": 2.0702, "step": 8835 }, { "epoch": 0.9080447960546594, "grad_norm": 0.049883171916007996, "learning_rate": 0.01, "loss": 2.1111, "step": 8838 }, { "epoch": 0.9083530257885544, "grad_norm": 0.04149313643574715, "learning_rate": 0.01, "loss": 2.0991, "step": 8841 }, { "epoch": 0.9086612555224494, "grad_norm": 0.09206261485815048, "learning_rate": 0.01, "loss": 2.0961, "step": 8844 }, { "epoch": 0.9089694852563444, "grad_norm": 0.1830751895904541, "learning_rate": 0.01, "loss": 2.1093, "step": 8847 }, { "epoch": 0.9092777149902393, "grad_norm": 0.0757865458726883, "learning_rate": 0.01, "loss": 2.115, "step": 8850 }, { "epoch": 0.9095859447241343, "grad_norm": 0.06030673533678055, "learning_rate": 0.01, "loss": 2.0874, "step": 8853 }, { "epoch": 0.9098941744580293, "grad_norm": 0.03440079465508461, "learning_rate": 0.01, "loss": 2.0997, "step": 8856 }, { "epoch": 0.9102024041919243, "grad_norm": 0.040004558861255646, "learning_rate": 0.01, "loss": 2.0767, "step": 8859 }, { "epoch": 0.9105106339258193, "grad_norm": 0.033261023461818695, "learning_rate": 0.01, "loss": 2.0834, "step": 8862 }, { "epoch": 0.9108188636597144, "grad_norm": 0.04814066365361214, "learning_rate": 0.01, "loss": 2.0868, "step": 8865 }, { "epoch": 0.9111270933936094, "grad_norm": 0.04939806088805199, "learning_rate": 0.01, "loss": 2.0944, "step": 8868 }, { "epoch": 0.9114353231275044, "grad_norm": 0.05242007225751877, "learning_rate": 0.01, "loss": 2.1035, "step": 8871 }, { "epoch": 0.9117435528613994, "grad_norm": 0.04576495289802551, "learning_rate": 0.01, "loss": 2.0881, "step": 8874 }, { "epoch": 0.9120517825952944, "grad_norm": 0.0369776152074337, "learning_rate": 0.01, "loss": 2.1017, "step": 8877 }, { "epoch": 0.9123600123291894, "grad_norm": 0.08296829462051392, "learning_rate": 0.01, "loss": 2.1199, "step": 8880 }, { "epoch": 0.9126682420630844, "grad_norm": 0.07186676561832428, "learning_rate": 0.01, "loss": 2.0906, "step": 8883 }, { "epoch": 0.9129764717969794, "grad_norm": 0.06849399209022522, "learning_rate": 0.01, "loss": 2.0944, "step": 8886 }, { "epoch": 0.9132847015308744, "grad_norm": 0.1285102367401123, "learning_rate": 0.01, "loss": 2.0959, "step": 8889 }, { "epoch": 0.9135929312647694, "grad_norm": 0.045700203627347946, "learning_rate": 0.01, "loss": 2.0924, "step": 8892 }, { "epoch": 0.9139011609986644, "grad_norm": 0.04561945050954819, "learning_rate": 0.01, "loss": 2.1126, "step": 8895 }, { "epoch": 0.9142093907325594, "grad_norm": 0.0417817123234272, "learning_rate": 0.01, "loss": 2.0692, "step": 8898 }, { "epoch": 0.9145176204664544, "grad_norm": 0.07923369109630585, "learning_rate": 0.01, "loss": 2.1059, "step": 8901 }, { "epoch": 0.9148258502003493, "grad_norm": 0.052836060523986816, "learning_rate": 0.01, "loss": 2.1089, "step": 8904 }, { "epoch": 0.9151340799342443, "grad_norm": 0.04591790586709976, "learning_rate": 0.01, "loss": 2.1007, "step": 8907 }, { "epoch": 0.9154423096681393, "grad_norm": 0.09871240705251694, "learning_rate": 0.01, "loss": 2.0718, "step": 8910 }, { "epoch": 0.9157505394020343, "grad_norm": 0.044554613530635834, "learning_rate": 0.01, "loss": 2.0956, "step": 8913 }, { "epoch": 0.9160587691359293, "grad_norm": 0.10009585320949554, "learning_rate": 0.01, "loss": 2.0838, "step": 8916 }, { "epoch": 0.9163669988698243, "grad_norm": 0.07252159714698792, "learning_rate": 0.01, "loss": 2.0973, "step": 8919 }, { "epoch": 0.9166752286037193, "grad_norm": 0.09162852168083191, "learning_rate": 0.01, "loss": 2.0961, "step": 8922 }, { "epoch": 0.9169834583376143, "grad_norm": 0.06149733439087868, "learning_rate": 0.01, "loss": 2.1377, "step": 8925 }, { "epoch": 0.9172916880715093, "grad_norm": 0.09315814077854156, "learning_rate": 0.01, "loss": 2.0901, "step": 8928 }, { "epoch": 0.9175999178054043, "grad_norm": 0.056877728551626205, "learning_rate": 0.01, "loss": 2.0934, "step": 8931 }, { "epoch": 0.9179081475392993, "grad_norm": 0.0976705476641655, "learning_rate": 0.01, "loss": 2.0791, "step": 8934 }, { "epoch": 0.9182163772731943, "grad_norm": 0.0493176206946373, "learning_rate": 0.01, "loss": 2.0937, "step": 8937 }, { "epoch": 0.9185246070070893, "grad_norm": 0.06268187612295151, "learning_rate": 0.01, "loss": 2.1053, "step": 8940 }, { "epoch": 0.9188328367409843, "grad_norm": 0.049251820892095566, "learning_rate": 0.01, "loss": 2.1104, "step": 8943 }, { "epoch": 0.9191410664748793, "grad_norm": 0.05342431366443634, "learning_rate": 0.01, "loss": 2.1005, "step": 8946 }, { "epoch": 0.9194492962087742, "grad_norm": 0.036090634763240814, "learning_rate": 0.01, "loss": 2.0815, "step": 8949 }, { "epoch": 0.9197575259426692, "grad_norm": 0.0320359505712986, "learning_rate": 0.01, "loss": 2.0704, "step": 8952 }, { "epoch": 0.9200657556765642, "grad_norm": 0.03514352813363075, "learning_rate": 0.01, "loss": 2.1046, "step": 8955 }, { "epoch": 0.9203739854104592, "grad_norm": 0.06132291629910469, "learning_rate": 0.01, "loss": 2.0887, "step": 8958 }, { "epoch": 0.9206822151443542, "grad_norm": 0.07312822341918945, "learning_rate": 0.01, "loss": 2.1079, "step": 8961 }, { "epoch": 0.9209904448782492, "grad_norm": 0.09670150279998779, "learning_rate": 0.01, "loss": 2.1195, "step": 8964 }, { "epoch": 0.9212986746121442, "grad_norm": 0.1106385663151741, "learning_rate": 0.01, "loss": 2.0809, "step": 8967 }, { "epoch": 0.9216069043460392, "grad_norm": 0.05964332073926926, "learning_rate": 0.01, "loss": 2.1108, "step": 8970 }, { "epoch": 0.9219151340799342, "grad_norm": 0.05584556236863136, "learning_rate": 0.01, "loss": 2.1274, "step": 8973 }, { "epoch": 0.9222233638138292, "grad_norm": 0.04485652595758438, "learning_rate": 0.01, "loss": 2.0627, "step": 8976 }, { "epoch": 0.9225315935477242, "grad_norm": 0.07286686450242996, "learning_rate": 0.01, "loss": 2.1087, "step": 8979 }, { "epoch": 0.9228398232816193, "grad_norm": 0.10815869271755219, "learning_rate": 0.01, "loss": 2.1057, "step": 8982 }, { "epoch": 0.9231480530155143, "grad_norm": 0.1037832722067833, "learning_rate": 0.01, "loss": 2.0836, "step": 8985 }, { "epoch": 0.9234562827494093, "grad_norm": 0.08297618478536606, "learning_rate": 0.01, "loss": 2.1181, "step": 8988 }, { "epoch": 0.9237645124833043, "grad_norm": 0.04203306511044502, "learning_rate": 0.01, "loss": 2.1112, "step": 8991 }, { "epoch": 0.9240727422171993, "grad_norm": 0.06641580909490585, "learning_rate": 0.01, "loss": 2.1004, "step": 8994 }, { "epoch": 0.9243809719510943, "grad_norm": 0.04921744763851166, "learning_rate": 0.01, "loss": 2.1116, "step": 8997 }, { "epoch": 0.9246892016849892, "grad_norm": 0.03472235053777695, "learning_rate": 0.01, "loss": 2.0777, "step": 9000 }, { "epoch": 0.9249974314188842, "grad_norm": 0.03650922700762749, "learning_rate": 0.01, "loss": 2.0802, "step": 9003 }, { "epoch": 0.9253056611527792, "grad_norm": 0.04657342657446861, "learning_rate": 0.01, "loss": 2.0773, "step": 9006 }, { "epoch": 0.9256138908866742, "grad_norm": 0.05943501368165016, "learning_rate": 0.01, "loss": 2.0753, "step": 9009 }, { "epoch": 0.9259221206205692, "grad_norm": 0.04763554409146309, "learning_rate": 0.01, "loss": 2.0959, "step": 9012 }, { "epoch": 0.9262303503544642, "grad_norm": 0.1267511248588562, "learning_rate": 0.01, "loss": 2.0971, "step": 9015 }, { "epoch": 0.9265385800883592, "grad_norm": 0.055529460310935974, "learning_rate": 0.01, "loss": 2.1327, "step": 9018 }, { "epoch": 0.9268468098222542, "grad_norm": 0.15508927404880524, "learning_rate": 0.01, "loss": 2.0947, "step": 9021 }, { "epoch": 0.9271550395561492, "grad_norm": 0.0593777671456337, "learning_rate": 0.01, "loss": 2.1171, "step": 9024 }, { "epoch": 0.9274632692900442, "grad_norm": 0.08907107263803482, "learning_rate": 0.01, "loss": 2.093, "step": 9027 }, { "epoch": 0.9277714990239392, "grad_norm": 0.07041808217763901, "learning_rate": 0.01, "loss": 2.0676, "step": 9030 }, { "epoch": 0.9280797287578342, "grad_norm": 0.03434208780527115, "learning_rate": 0.01, "loss": 2.0928, "step": 9033 }, { "epoch": 0.9283879584917292, "grad_norm": 0.07591548562049866, "learning_rate": 0.01, "loss": 2.0857, "step": 9036 }, { "epoch": 0.9286961882256242, "grad_norm": 0.08999443799257278, "learning_rate": 0.01, "loss": 2.0984, "step": 9039 }, { "epoch": 0.9290044179595192, "grad_norm": 0.11046464741230011, "learning_rate": 0.01, "loss": 2.1009, "step": 9042 }, { "epoch": 0.9293126476934142, "grad_norm": 0.08271370083093643, "learning_rate": 0.01, "loss": 2.1027, "step": 9045 }, { "epoch": 0.9296208774273091, "grad_norm": 0.046337299048900604, "learning_rate": 0.01, "loss": 2.0826, "step": 9048 }, { "epoch": 0.9299291071612041, "grad_norm": 0.037284769117832184, "learning_rate": 0.01, "loss": 2.1015, "step": 9051 }, { "epoch": 0.9302373368950991, "grad_norm": 0.04956496134400368, "learning_rate": 0.01, "loss": 2.1036, "step": 9054 }, { "epoch": 0.9305455666289941, "grad_norm": 0.12329571694135666, "learning_rate": 0.01, "loss": 2.0917, "step": 9057 }, { "epoch": 0.9308537963628891, "grad_norm": 0.06971380859613419, "learning_rate": 0.01, "loss": 2.114, "step": 9060 }, { "epoch": 0.9311620260967841, "grad_norm": 0.06084508076310158, "learning_rate": 0.01, "loss": 2.1122, "step": 9063 }, { "epoch": 0.9314702558306791, "grad_norm": 0.049602411687374115, "learning_rate": 0.01, "loss": 2.1268, "step": 9066 }, { "epoch": 0.9317784855645741, "grad_norm": 0.05200349539518356, "learning_rate": 0.01, "loss": 2.0979, "step": 9069 }, { "epoch": 0.9320867152984691, "grad_norm": 0.05793909728527069, "learning_rate": 0.01, "loss": 2.096, "step": 9072 }, { "epoch": 0.9323949450323641, "grad_norm": 0.10819883644580841, "learning_rate": 0.01, "loss": 2.1096, "step": 9075 }, { "epoch": 0.9327031747662591, "grad_norm": 0.07809442281723022, "learning_rate": 0.01, "loss": 2.0968, "step": 9078 }, { "epoch": 0.9330114045001541, "grad_norm": 0.09595733880996704, "learning_rate": 0.01, "loss": 2.0769, "step": 9081 }, { "epoch": 0.9333196342340491, "grad_norm": 0.11658616364002228, "learning_rate": 0.01, "loss": 2.0945, "step": 9084 }, { "epoch": 0.9336278639679441, "grad_norm": 0.07642678171396255, "learning_rate": 0.01, "loss": 2.0811, "step": 9087 }, { "epoch": 0.933936093701839, "grad_norm": 0.03174865245819092, "learning_rate": 0.01, "loss": 2.1017, "step": 9090 }, { "epoch": 0.934244323435734, "grad_norm": 0.05137626454234123, "learning_rate": 0.01, "loss": 2.0878, "step": 9093 }, { "epoch": 0.9345525531696292, "grad_norm": 0.05306951329112053, "learning_rate": 0.01, "loss": 2.1163, "step": 9096 }, { "epoch": 0.9348607829035241, "grad_norm": 0.0716642439365387, "learning_rate": 0.01, "loss": 2.0903, "step": 9099 }, { "epoch": 0.9351690126374191, "grad_norm": 0.10328514873981476, "learning_rate": 0.01, "loss": 2.0789, "step": 9102 }, { "epoch": 0.9354772423713141, "grad_norm": 0.04914560168981552, "learning_rate": 0.01, "loss": 2.0963, "step": 9105 }, { "epoch": 0.9357854721052091, "grad_norm": 0.04810576140880585, "learning_rate": 0.01, "loss": 2.1119, "step": 9108 }, { "epoch": 0.9360937018391041, "grad_norm": 0.05689787119626999, "learning_rate": 0.01, "loss": 2.0955, "step": 9111 }, { "epoch": 0.9364019315729991, "grad_norm": 0.06455382704734802, "learning_rate": 0.01, "loss": 2.0894, "step": 9114 }, { "epoch": 0.9367101613068941, "grad_norm": 0.044911760836839676, "learning_rate": 0.01, "loss": 2.0967, "step": 9117 }, { "epoch": 0.9370183910407891, "grad_norm": 0.06244887784123421, "learning_rate": 0.01, "loss": 2.0921, "step": 9120 }, { "epoch": 0.9373266207746841, "grad_norm": 0.052621614187955856, "learning_rate": 0.01, "loss": 2.1296, "step": 9123 }, { "epoch": 0.9376348505085791, "grad_norm": 0.05098232626914978, "learning_rate": 0.01, "loss": 2.0807, "step": 9126 }, { "epoch": 0.9379430802424741, "grad_norm": 0.058582011610269547, "learning_rate": 0.01, "loss": 2.0973, "step": 9129 }, { "epoch": 0.9382513099763691, "grad_norm": 0.10984500497579575, "learning_rate": 0.01, "loss": 2.0789, "step": 9132 }, { "epoch": 0.9385595397102641, "grad_norm": 0.045173123478889465, "learning_rate": 0.01, "loss": 2.0937, "step": 9135 }, { "epoch": 0.9388677694441591, "grad_norm": 0.06749478727579117, "learning_rate": 0.01, "loss": 2.1051, "step": 9138 }, { "epoch": 0.939175999178054, "grad_norm": 0.06236808001995087, "learning_rate": 0.01, "loss": 2.1099, "step": 9141 }, { "epoch": 0.939484228911949, "grad_norm": 0.06205837428569794, "learning_rate": 0.01, "loss": 2.0893, "step": 9144 }, { "epoch": 0.939792458645844, "grad_norm": 0.0742972195148468, "learning_rate": 0.01, "loss": 2.1034, "step": 9147 }, { "epoch": 0.940100688379739, "grad_norm": 0.06998419016599655, "learning_rate": 0.01, "loss": 2.0558, "step": 9150 }, { "epoch": 0.940408918113634, "grad_norm": 0.04214362055063248, "learning_rate": 0.01, "loss": 2.0968, "step": 9153 }, { "epoch": 0.940717147847529, "grad_norm": 0.055913276970386505, "learning_rate": 0.01, "loss": 2.0736, "step": 9156 }, { "epoch": 0.941025377581424, "grad_norm": 0.0941486805677414, "learning_rate": 0.01, "loss": 2.1038, "step": 9159 }, { "epoch": 0.941333607315319, "grad_norm": 0.05609782040119171, "learning_rate": 0.01, "loss": 2.096, "step": 9162 }, { "epoch": 0.941641837049214, "grad_norm": 0.05714662745594978, "learning_rate": 0.01, "loss": 2.0939, "step": 9165 }, { "epoch": 0.941950066783109, "grad_norm": 0.05364496633410454, "learning_rate": 0.01, "loss": 2.0838, "step": 9168 }, { "epoch": 0.942258296517004, "grad_norm": 0.050090350210666656, "learning_rate": 0.01, "loss": 2.087, "step": 9171 }, { "epoch": 0.942566526250899, "grad_norm": 0.07287559658288956, "learning_rate": 0.01, "loss": 2.098, "step": 9174 }, { "epoch": 0.942874755984794, "grad_norm": 0.04061901941895485, "learning_rate": 0.01, "loss": 2.0677, "step": 9177 }, { "epoch": 0.943182985718689, "grad_norm": 0.10750306397676468, "learning_rate": 0.01, "loss": 2.1105, "step": 9180 }, { "epoch": 0.943491215452584, "grad_norm": 0.10353365540504456, "learning_rate": 0.01, "loss": 2.0712, "step": 9183 }, { "epoch": 0.943799445186479, "grad_norm": 0.07502592355012894, "learning_rate": 0.01, "loss": 2.1115, "step": 9186 }, { "epoch": 0.944107674920374, "grad_norm": 0.046962104737758636, "learning_rate": 0.01, "loss": 2.0937, "step": 9189 }, { "epoch": 0.944415904654269, "grad_norm": 0.05084332078695297, "learning_rate": 0.01, "loss": 2.0943, "step": 9192 }, { "epoch": 0.9447241343881639, "grad_norm": 0.0458371527493, "learning_rate": 0.01, "loss": 2.0967, "step": 9195 }, { "epoch": 0.9450323641220589, "grad_norm": 0.040458545088768005, "learning_rate": 0.01, "loss": 2.0949, "step": 9198 }, { "epoch": 0.9453405938559539, "grad_norm": 0.046158358454704285, "learning_rate": 0.01, "loss": 2.0912, "step": 9201 }, { "epoch": 0.9456488235898489, "grad_norm": 0.10080043226480484, "learning_rate": 0.01, "loss": 2.1, "step": 9204 }, { "epoch": 0.9459570533237439, "grad_norm": 0.07679333537817001, "learning_rate": 0.01, "loss": 2.1013, "step": 9207 }, { "epoch": 0.9462652830576389, "grad_norm": 0.07189175486564636, "learning_rate": 0.01, "loss": 2.1008, "step": 9210 }, { "epoch": 0.946573512791534, "grad_norm": 0.07828579097986221, "learning_rate": 0.01, "loss": 2.1063, "step": 9213 }, { "epoch": 0.946881742525429, "grad_norm": 0.07649674266576767, "learning_rate": 0.01, "loss": 2.1146, "step": 9216 }, { "epoch": 0.947189972259324, "grad_norm": 0.06558651477098465, "learning_rate": 0.01, "loss": 2.0705, "step": 9219 }, { "epoch": 0.947498201993219, "grad_norm": 0.03276702016592026, "learning_rate": 0.01, "loss": 2.1065, "step": 9222 }, { "epoch": 0.947806431727114, "grad_norm": 0.03779645636677742, "learning_rate": 0.01, "loss": 2.0924, "step": 9225 }, { "epoch": 0.948114661461009, "grad_norm": 0.048466913402080536, "learning_rate": 0.01, "loss": 2.1037, "step": 9228 }, { "epoch": 0.948422891194904, "grad_norm": 0.04391203075647354, "learning_rate": 0.01, "loss": 2.0722, "step": 9231 }, { "epoch": 0.948731120928799, "grad_norm": 0.11353743076324463, "learning_rate": 0.01, "loss": 2.113, "step": 9234 }, { "epoch": 0.949039350662694, "grad_norm": 0.045930709689855576, "learning_rate": 0.01, "loss": 2.0699, "step": 9237 }, { "epoch": 0.949347580396589, "grad_norm": 0.06440164893865585, "learning_rate": 0.01, "loss": 2.0786, "step": 9240 }, { "epoch": 0.949655810130484, "grad_norm": 0.08666238933801651, "learning_rate": 0.01, "loss": 2.1049, "step": 9243 }, { "epoch": 0.9499640398643789, "grad_norm": 0.11012524366378784, "learning_rate": 0.01, "loss": 2.1018, "step": 9246 }, { "epoch": 0.9502722695982739, "grad_norm": 0.047307875007390976, "learning_rate": 0.01, "loss": 2.0943, "step": 9249 }, { "epoch": 0.9505804993321689, "grad_norm": 0.04565277695655823, "learning_rate": 0.01, "loss": 2.1174, "step": 9252 }, { "epoch": 0.9508887290660639, "grad_norm": 0.03389623388648033, "learning_rate": 0.01, "loss": 2.0896, "step": 9255 }, { "epoch": 0.9511969587999589, "grad_norm": 0.04582008346915245, "learning_rate": 0.01, "loss": 2.0888, "step": 9258 }, { "epoch": 0.9515051885338539, "grad_norm": 0.07722247391939163, "learning_rate": 0.01, "loss": 2.0843, "step": 9261 }, { "epoch": 0.9518134182677489, "grad_norm": 0.03505149856209755, "learning_rate": 0.01, "loss": 2.0903, "step": 9264 }, { "epoch": 0.9521216480016439, "grad_norm": 0.08010539412498474, "learning_rate": 0.01, "loss": 2.1249, "step": 9267 }, { "epoch": 0.9524298777355389, "grad_norm": 0.0723007321357727, "learning_rate": 0.01, "loss": 2.0951, "step": 9270 }, { "epoch": 0.9527381074694339, "grad_norm": 0.05629736930131912, "learning_rate": 0.01, "loss": 2.0948, "step": 9273 }, { "epoch": 0.9530463372033289, "grad_norm": 0.05514506623148918, "learning_rate": 0.01, "loss": 2.1214, "step": 9276 }, { "epoch": 0.9533545669372239, "grad_norm": 0.1107834130525589, "learning_rate": 0.01, "loss": 2.0876, "step": 9279 }, { "epoch": 0.9536627966711189, "grad_norm": 0.046309590339660645, "learning_rate": 0.01, "loss": 2.0669, "step": 9282 }, { "epoch": 0.9539710264050139, "grad_norm": 0.06956466287374496, "learning_rate": 0.01, "loss": 2.0903, "step": 9285 }, { "epoch": 0.9542792561389088, "grad_norm": 0.086011603474617, "learning_rate": 0.01, "loss": 2.0896, "step": 9288 }, { "epoch": 0.9545874858728038, "grad_norm": 0.04768074303865433, "learning_rate": 0.01, "loss": 2.0923, "step": 9291 }, { "epoch": 0.9548957156066988, "grad_norm": 0.0958017110824585, "learning_rate": 0.01, "loss": 2.1134, "step": 9294 }, { "epoch": 0.9552039453405938, "grad_norm": 0.06098558008670807, "learning_rate": 0.01, "loss": 2.0775, "step": 9297 }, { "epoch": 0.9555121750744888, "grad_norm": 0.05258086323738098, "learning_rate": 0.01, "loss": 2.0998, "step": 9300 }, { "epoch": 0.9558204048083838, "grad_norm": 0.06664231419563293, "learning_rate": 0.01, "loss": 2.1215, "step": 9303 }, { "epoch": 0.9561286345422788, "grad_norm": 0.05491488054394722, "learning_rate": 0.01, "loss": 2.0837, "step": 9306 }, { "epoch": 0.9564368642761738, "grad_norm": 0.0436725877225399, "learning_rate": 0.01, "loss": 2.1268, "step": 9309 }, { "epoch": 0.9567450940100688, "grad_norm": 0.08737560361623764, "learning_rate": 0.01, "loss": 2.0901, "step": 9312 }, { "epoch": 0.9570533237439638, "grad_norm": 0.08130110800266266, "learning_rate": 0.01, "loss": 2.0766, "step": 9315 }, { "epoch": 0.9573615534778588, "grad_norm": 0.07826768606901169, "learning_rate": 0.01, "loss": 2.0836, "step": 9318 }, { "epoch": 0.9576697832117538, "grad_norm": 0.09330857545137405, "learning_rate": 0.01, "loss": 2.0794, "step": 9321 }, { "epoch": 0.9579780129456488, "grad_norm": 0.03914652019739151, "learning_rate": 0.01, "loss": 2.102, "step": 9324 }, { "epoch": 0.9582862426795438, "grad_norm": 0.03853154182434082, "learning_rate": 0.01, "loss": 2.0915, "step": 9327 }, { "epoch": 0.9585944724134389, "grad_norm": 0.07349935919046402, "learning_rate": 0.01, "loss": 2.0856, "step": 9330 }, { "epoch": 0.9589027021473339, "grad_norm": 0.1473885178565979, "learning_rate": 0.01, "loss": 2.0904, "step": 9333 }, { "epoch": 0.9592109318812289, "grad_norm": 0.11091527342796326, "learning_rate": 0.01, "loss": 2.0934, "step": 9336 }, { "epoch": 0.9595191616151239, "grad_norm": 0.0400085523724556, "learning_rate": 0.01, "loss": 2.0924, "step": 9339 }, { "epoch": 0.9598273913490188, "grad_norm": 0.05025499314069748, "learning_rate": 0.01, "loss": 2.0845, "step": 9342 }, { "epoch": 0.9601356210829138, "grad_norm": 0.03745681792497635, "learning_rate": 0.01, "loss": 2.1006, "step": 9345 }, { "epoch": 0.9604438508168088, "grad_norm": 0.05147318169474602, "learning_rate": 0.01, "loss": 2.0912, "step": 9348 }, { "epoch": 0.9607520805507038, "grad_norm": 0.06338364630937576, "learning_rate": 0.01, "loss": 2.1169, "step": 9351 }, { "epoch": 0.9610603102845988, "grad_norm": 0.09458258748054504, "learning_rate": 0.01, "loss": 2.1005, "step": 9354 }, { "epoch": 0.9613685400184938, "grad_norm": 0.09883291274309158, "learning_rate": 0.01, "loss": 2.0934, "step": 9357 }, { "epoch": 0.9616767697523888, "grad_norm": 0.048908524215221405, "learning_rate": 0.01, "loss": 2.0863, "step": 9360 }, { "epoch": 0.9619849994862838, "grad_norm": 0.11762084811925888, "learning_rate": 0.01, "loss": 2.1182, "step": 9363 }, { "epoch": 0.9622932292201788, "grad_norm": 0.0835133045911789, "learning_rate": 0.01, "loss": 2.0728, "step": 9366 }, { "epoch": 0.9626014589540738, "grad_norm": 0.0580466203391552, "learning_rate": 0.01, "loss": 2.0756, "step": 9369 }, { "epoch": 0.9629096886879688, "grad_norm": 0.051043394953012466, "learning_rate": 0.01, "loss": 2.0936, "step": 9372 }, { "epoch": 0.9632179184218638, "grad_norm": 0.1081843450665474, "learning_rate": 0.01, "loss": 2.107, "step": 9375 }, { "epoch": 0.9635261481557588, "grad_norm": 0.04656577482819557, "learning_rate": 0.01, "loss": 2.1084, "step": 9378 }, { "epoch": 0.9638343778896538, "grad_norm": 0.03988798335194588, "learning_rate": 0.01, "loss": 2.1015, "step": 9381 }, { "epoch": 0.9641426076235488, "grad_norm": 0.07686126232147217, "learning_rate": 0.01, "loss": 2.1417, "step": 9384 }, { "epoch": 0.9644508373574437, "grad_norm": 0.057407401502132416, "learning_rate": 0.01, "loss": 2.1191, "step": 9387 }, { "epoch": 0.9647590670913387, "grad_norm": 0.0947386845946312, "learning_rate": 0.01, "loss": 2.0796, "step": 9390 }, { "epoch": 0.9650672968252337, "grad_norm": 0.05064699798822403, "learning_rate": 0.01, "loss": 2.1001, "step": 9393 }, { "epoch": 0.9653755265591287, "grad_norm": 0.04948986694216728, "learning_rate": 0.01, "loss": 2.0736, "step": 9396 }, { "epoch": 0.9656837562930237, "grad_norm": 0.10736438632011414, "learning_rate": 0.01, "loss": 2.0939, "step": 9399 }, { "epoch": 0.9659919860269187, "grad_norm": 0.039317477494478226, "learning_rate": 0.01, "loss": 2.1077, "step": 9402 }, { "epoch": 0.9663002157608137, "grad_norm": 0.06933067739009857, "learning_rate": 0.01, "loss": 2.1056, "step": 9405 }, { "epoch": 0.9666084454947087, "grad_norm": 0.03649623692035675, "learning_rate": 0.01, "loss": 2.0838, "step": 9408 }, { "epoch": 0.9669166752286037, "grad_norm": 0.09309684485197067, "learning_rate": 0.01, "loss": 2.0913, "step": 9411 }, { "epoch": 0.9672249049624987, "grad_norm": 0.11532922834157944, "learning_rate": 0.01, "loss": 2.1127, "step": 9414 }, { "epoch": 0.9675331346963937, "grad_norm": 0.053582970052957535, "learning_rate": 0.01, "loss": 2.0812, "step": 9417 }, { "epoch": 0.9678413644302887, "grad_norm": 0.07581201195716858, "learning_rate": 0.01, "loss": 2.1148, "step": 9420 }, { "epoch": 0.9681495941641837, "grad_norm": 0.051002178341150284, "learning_rate": 0.01, "loss": 2.0834, "step": 9423 }, { "epoch": 0.9684578238980787, "grad_norm": 0.06385383754968643, "learning_rate": 0.01, "loss": 2.0826, "step": 9426 }, { "epoch": 0.9687660536319737, "grad_norm": 0.10576994717121124, "learning_rate": 0.01, "loss": 2.0768, "step": 9429 }, { "epoch": 0.9690742833658686, "grad_norm": 0.054983410984277725, "learning_rate": 0.01, "loss": 2.0604, "step": 9432 }, { "epoch": 0.9693825130997636, "grad_norm": 0.09159716218709946, "learning_rate": 0.01, "loss": 2.0613, "step": 9435 }, { "epoch": 0.9696907428336586, "grad_norm": 0.07718406617641449, "learning_rate": 0.01, "loss": 2.1132, "step": 9438 }, { "epoch": 0.9699989725675536, "grad_norm": 0.0788009986281395, "learning_rate": 0.01, "loss": 2.0887, "step": 9441 }, { "epoch": 0.9703072023014487, "grad_norm": 0.040717653930187225, "learning_rate": 0.01, "loss": 2.084, "step": 9444 }, { "epoch": 0.9706154320353437, "grad_norm": 0.09677381813526154, "learning_rate": 0.01, "loss": 2.0903, "step": 9447 }, { "epoch": 0.9709236617692387, "grad_norm": 0.0706525593996048, "learning_rate": 0.01, "loss": 2.0647, "step": 9450 }, { "epoch": 0.9712318915031337, "grad_norm": 0.04624510183930397, "learning_rate": 0.01, "loss": 2.0818, "step": 9453 }, { "epoch": 0.9715401212370287, "grad_norm": 0.04585500434041023, "learning_rate": 0.01, "loss": 2.0927, "step": 9456 }, { "epoch": 0.9718483509709237, "grad_norm": 0.03468145430088043, "learning_rate": 0.01, "loss": 2.0759, "step": 9459 }, { "epoch": 0.9721565807048187, "grad_norm": 0.06956649571657181, "learning_rate": 0.01, "loss": 2.092, "step": 9462 }, { "epoch": 0.9724648104387137, "grad_norm": 0.04509080946445465, "learning_rate": 0.01, "loss": 2.1095, "step": 9465 }, { "epoch": 0.9727730401726087, "grad_norm": 0.09959586709737778, "learning_rate": 0.01, "loss": 2.0921, "step": 9468 }, { "epoch": 0.9730812699065037, "grad_norm": 0.08427727967500687, "learning_rate": 0.01, "loss": 2.1031, "step": 9471 }, { "epoch": 0.9733894996403987, "grad_norm": 0.14798741042613983, "learning_rate": 0.01, "loss": 2.091, "step": 9474 }, { "epoch": 0.9736977293742937, "grad_norm": 0.057735662907361984, "learning_rate": 0.01, "loss": 2.0701, "step": 9477 }, { "epoch": 0.9740059591081887, "grad_norm": 0.04484837129712105, "learning_rate": 0.01, "loss": 2.1015, "step": 9480 }, { "epoch": 0.9743141888420837, "grad_norm": 0.04166285693645477, "learning_rate": 0.01, "loss": 2.1021, "step": 9483 }, { "epoch": 0.9746224185759786, "grad_norm": 0.05640358105301857, "learning_rate": 0.01, "loss": 2.0925, "step": 9486 }, { "epoch": 0.9749306483098736, "grad_norm": 0.040314216166734695, "learning_rate": 0.01, "loss": 2.0797, "step": 9489 }, { "epoch": 0.9752388780437686, "grad_norm": 0.04522860422730446, "learning_rate": 0.01, "loss": 2.0935, "step": 9492 }, { "epoch": 0.9755471077776636, "grad_norm": 0.03492886200547218, "learning_rate": 0.01, "loss": 2.0968, "step": 9495 }, { "epoch": 0.9758553375115586, "grad_norm": 0.03252703323960304, "learning_rate": 0.01, "loss": 2.125, "step": 9498 }, { "epoch": 0.9761635672454536, "grad_norm": 0.04002056270837784, "learning_rate": 0.01, "loss": 2.0651, "step": 9501 }, { "epoch": 0.9764717969793486, "grad_norm": 0.07364718616008759, "learning_rate": 0.01, "loss": 2.0629, "step": 9504 }, { "epoch": 0.9767800267132436, "grad_norm": 0.05577448755502701, "learning_rate": 0.01, "loss": 2.0726, "step": 9507 }, { "epoch": 0.9770882564471386, "grad_norm": 0.13259132206439972, "learning_rate": 0.01, "loss": 2.1075, "step": 9510 }, { "epoch": 0.9773964861810336, "grad_norm": 0.06911557912826538, "learning_rate": 0.01, "loss": 2.0887, "step": 9513 }, { "epoch": 0.9777047159149286, "grad_norm": 0.10592345148324966, "learning_rate": 0.01, "loss": 2.0982, "step": 9516 }, { "epoch": 0.9780129456488236, "grad_norm": 0.05682144686579704, "learning_rate": 0.01, "loss": 2.0961, "step": 9519 }, { "epoch": 0.9783211753827186, "grad_norm": 0.07456633448600769, "learning_rate": 0.01, "loss": 2.0983, "step": 9522 }, { "epoch": 0.9786294051166136, "grad_norm": 0.062031425535678864, "learning_rate": 0.01, "loss": 2.1163, "step": 9525 }, { "epoch": 0.9789376348505086, "grad_norm": 0.0570233091711998, "learning_rate": 0.01, "loss": 2.1046, "step": 9528 }, { "epoch": 0.9792458645844035, "grad_norm": 0.04668619483709335, "learning_rate": 0.01, "loss": 2.081, "step": 9531 }, { "epoch": 0.9795540943182985, "grad_norm": 0.04718153178691864, "learning_rate": 0.01, "loss": 2.0678, "step": 9534 }, { "epoch": 0.9798623240521935, "grad_norm": 0.054066251963377, "learning_rate": 0.01, "loss": 2.0911, "step": 9537 }, { "epoch": 0.9801705537860885, "grad_norm": 0.1274210512638092, "learning_rate": 0.01, "loss": 2.097, "step": 9540 }, { "epoch": 0.9804787835199835, "grad_norm": 0.07543773949146271, "learning_rate": 0.01, "loss": 2.0824, "step": 9543 }, { "epoch": 0.9807870132538785, "grad_norm": 0.07845018804073334, "learning_rate": 0.01, "loss": 2.0749, "step": 9546 }, { "epoch": 0.9810952429877735, "grad_norm": 0.08444254100322723, "learning_rate": 0.01, "loss": 2.1019, "step": 9549 }, { "epoch": 0.9814034727216685, "grad_norm": 0.07719142735004425, "learning_rate": 0.01, "loss": 2.0811, "step": 9552 }, { "epoch": 0.9817117024555635, "grad_norm": 0.05624673515558243, "learning_rate": 0.01, "loss": 2.0752, "step": 9555 }, { "epoch": 0.9820199321894585, "grad_norm": 0.0419309176504612, "learning_rate": 0.01, "loss": 2.0812, "step": 9558 }, { "epoch": 0.9823281619233536, "grad_norm": 0.0343257375061512, "learning_rate": 0.01, "loss": 2.0694, "step": 9561 }, { "epoch": 0.9826363916572486, "grad_norm": 0.059452395886182785, "learning_rate": 0.01, "loss": 2.0521, "step": 9564 }, { "epoch": 0.9829446213911436, "grad_norm": 0.09073518216609955, "learning_rate": 0.01, "loss": 2.0636, "step": 9567 }, { "epoch": 0.9832528511250386, "grad_norm": 0.10660509765148163, "learning_rate": 0.01, "loss": 2.0796, "step": 9570 }, { "epoch": 0.9835610808589336, "grad_norm": 0.04380667209625244, "learning_rate": 0.01, "loss": 2.0992, "step": 9573 }, { "epoch": 0.9838693105928286, "grad_norm": 0.06383811682462692, "learning_rate": 0.01, "loss": 2.0722, "step": 9576 }, { "epoch": 0.9841775403267236, "grad_norm": 0.07926032692193985, "learning_rate": 0.01, "loss": 2.0571, "step": 9579 }, { "epoch": 0.9844857700606185, "grad_norm": 0.05310386046767235, "learning_rate": 0.01, "loss": 2.0739, "step": 9582 }, { "epoch": 0.9847939997945135, "grad_norm": 0.03591843321919441, "learning_rate": 0.01, "loss": 2.0757, "step": 9585 }, { "epoch": 0.9851022295284085, "grad_norm": 0.04773431271314621, "learning_rate": 0.01, "loss": 2.0525, "step": 9588 }, { "epoch": 0.9854104592623035, "grad_norm": 0.04679710045456886, "learning_rate": 0.01, "loss": 2.0771, "step": 9591 }, { "epoch": 0.9857186889961985, "grad_norm": 0.05671774223446846, "learning_rate": 0.01, "loss": 2.1106, "step": 9594 }, { "epoch": 0.9860269187300935, "grad_norm": 0.049488577991724014, "learning_rate": 0.01, "loss": 2.0695, "step": 9597 }, { "epoch": 0.9863351484639885, "grad_norm": 0.04207129031419754, "learning_rate": 0.01, "loss": 2.0903, "step": 9600 }, { "epoch": 0.9866433781978835, "grad_norm": 0.10019747167825699, "learning_rate": 0.01, "loss": 2.073, "step": 9603 }, { "epoch": 0.9869516079317785, "grad_norm": 0.051381729543209076, "learning_rate": 0.01, "loss": 2.0626, "step": 9606 }, { "epoch": 0.9872598376656735, "grad_norm": 0.13477744162082672, "learning_rate": 0.01, "loss": 2.1098, "step": 9609 }, { "epoch": 0.9875680673995685, "grad_norm": 0.09002148360013962, "learning_rate": 0.01, "loss": 2.0927, "step": 9612 }, { "epoch": 0.9878762971334635, "grad_norm": 0.05230112001299858, "learning_rate": 0.01, "loss": 2.0902, "step": 9615 }, { "epoch": 0.9881845268673585, "grad_norm": 0.0639885738492012, "learning_rate": 0.01, "loss": 2.1179, "step": 9618 }, { "epoch": 0.9884927566012535, "grad_norm": 0.0553070530295372, "learning_rate": 0.01, "loss": 2.0923, "step": 9621 }, { "epoch": 0.9888009863351485, "grad_norm": 0.04541468620300293, "learning_rate": 0.01, "loss": 2.0965, "step": 9624 }, { "epoch": 0.9891092160690435, "grad_norm": 0.08656930178403854, "learning_rate": 0.01, "loss": 2.1038, "step": 9627 }, { "epoch": 0.9894174458029384, "grad_norm": 0.04954921826720238, "learning_rate": 0.01, "loss": 2.0759, "step": 9630 }, { "epoch": 0.9897256755368334, "grad_norm": 0.07971720397472382, "learning_rate": 0.01, "loss": 2.0837, "step": 9633 }, { "epoch": 0.9900339052707284, "grad_norm": 0.12388944625854492, "learning_rate": 0.01, "loss": 2.1181, "step": 9636 }, { "epoch": 0.9903421350046234, "grad_norm": 0.040693242102861404, "learning_rate": 0.01, "loss": 2.0806, "step": 9639 }, { "epoch": 0.9906503647385184, "grad_norm": 0.032711997628211975, "learning_rate": 0.01, "loss": 2.0925, "step": 9642 }, { "epoch": 0.9909585944724134, "grad_norm": 0.04089382663369179, "learning_rate": 0.01, "loss": 2.0841, "step": 9645 }, { "epoch": 0.9912668242063084, "grad_norm": 0.05480481684207916, "learning_rate": 0.01, "loss": 2.0769, "step": 9648 }, { "epoch": 0.9915750539402034, "grad_norm": 0.04627472907304764, "learning_rate": 0.01, "loss": 2.094, "step": 9651 }, { "epoch": 0.9918832836740984, "grad_norm": 0.0517272874712944, "learning_rate": 0.01, "loss": 2.1181, "step": 9654 }, { "epoch": 0.9921915134079934, "grad_norm": 0.051012761890888214, "learning_rate": 0.01, "loss": 2.0985, "step": 9657 }, { "epoch": 0.9924997431418884, "grad_norm": 0.08666348457336426, "learning_rate": 0.01, "loss": 2.0875, "step": 9660 }, { "epoch": 0.9928079728757834, "grad_norm": 0.0972173810005188, "learning_rate": 0.01, "loss": 2.0995, "step": 9663 }, { "epoch": 0.9931162026096784, "grad_norm": 0.0765865370631218, "learning_rate": 0.01, "loss": 2.0729, "step": 9666 }, { "epoch": 0.9934244323435734, "grad_norm": 0.04532674700021744, "learning_rate": 0.01, "loss": 2.0656, "step": 9669 }, { "epoch": 0.9937326620774684, "grad_norm": 0.08642619848251343, "learning_rate": 0.01, "loss": 2.1036, "step": 9672 }, { "epoch": 0.9940408918113633, "grad_norm": 0.04758689925074577, "learning_rate": 0.01, "loss": 2.0683, "step": 9675 }, { "epoch": 0.9943491215452585, "grad_norm": 0.07701463252305984, "learning_rate": 0.01, "loss": 2.0898, "step": 9678 }, { "epoch": 0.9946573512791534, "grad_norm": 0.05999990925192833, "learning_rate": 0.01, "loss": 2.0694, "step": 9681 }, { "epoch": 0.9949655810130484, "grad_norm": 0.08793257176876068, "learning_rate": 0.01, "loss": 2.0689, "step": 9684 }, { "epoch": 0.9952738107469434, "grad_norm": 0.06139199063181877, "learning_rate": 0.01, "loss": 2.0801, "step": 9687 }, { "epoch": 0.9955820404808384, "grad_norm": 0.09202239662408829, "learning_rate": 0.01, "loss": 2.0837, "step": 9690 }, { "epoch": 0.9958902702147334, "grad_norm": 0.09284163266420364, "learning_rate": 0.01, "loss": 2.107, "step": 9693 }, { "epoch": 0.9961984999486284, "grad_norm": 0.08113729953765869, "learning_rate": 0.01, "loss": 2.076, "step": 9696 }, { "epoch": 0.9965067296825234, "grad_norm": 0.10663104802370071, "learning_rate": 0.01, "loss": 2.0973, "step": 9699 }, { "epoch": 0.9968149594164184, "grad_norm": 0.11791951954364777, "learning_rate": 0.01, "loss": 2.0885, "step": 9702 }, { "epoch": 0.9971231891503134, "grad_norm": 0.09039194136857986, "learning_rate": 0.01, "loss": 2.0957, "step": 9705 }, { "epoch": 0.9974314188842084, "grad_norm": 0.08142858743667603, "learning_rate": 0.01, "loss": 2.0721, "step": 9708 }, { "epoch": 0.9977396486181034, "grad_norm": 0.07347192615270615, "learning_rate": 0.01, "loss": 2.0985, "step": 9711 }, { "epoch": 0.9980478783519984, "grad_norm": 0.04449746012687683, "learning_rate": 0.01, "loss": 2.0728, "step": 9714 }, { "epoch": 0.9983561080858934, "grad_norm": 0.040178634226322174, "learning_rate": 0.01, "loss": 2.0773, "step": 9717 }, { "epoch": 0.9986643378197884, "grad_norm": 0.0577414333820343, "learning_rate": 0.01, "loss": 2.0854, "step": 9720 }, { "epoch": 0.9989725675536834, "grad_norm": 0.07444582879543304, "learning_rate": 0.01, "loss": 2.0834, "step": 9723 }, { "epoch": 0.9992807972875783, "grad_norm": 0.10387948155403137, "learning_rate": 0.01, "loss": 2.0698, "step": 9726 }, { "epoch": 0.9995890270214733, "grad_norm": 0.11066528409719467, "learning_rate": 0.01, "loss": 2.1035, "step": 9729 }, { "epoch": 0.9998972567553683, "grad_norm": 0.06454616039991379, "learning_rate": 0.01, "loss": 2.0692, "step": 9732 }, { "epoch": 0.9990763546798029, "grad_norm": 0.048325520008802414, "learning_rate": 0.01, "loss": 2.1225, "step": 9735 }, { "epoch": 0.999384236453202, "grad_norm": 0.03542228788137436, "learning_rate": 0.01, "loss": 2.1024, "step": 9738 }, { "epoch": 0.999692118226601, "grad_norm": 0.042020559310913086, "learning_rate": 0.01, "loss": 2.0968, "step": 9741 }, { "epoch": 1.0, "grad_norm": 0.04916913062334061, "learning_rate": 0.01, "loss": 2.1244, "step": 9744 }, { "epoch": 1.000307881773399, "grad_norm": 0.08905553072690964, "learning_rate": 0.01, "loss": 2.0867, "step": 9747 }, { "epoch": 1.000615763546798, "grad_norm": 0.07140953093767166, "learning_rate": 0.01, "loss": 2.0863, "step": 9750 }, { "epoch": 1.000923645320197, "grad_norm": 0.05284767597913742, "learning_rate": 0.01, "loss": 2.1131, "step": 9753 }, { "epoch": 1.001231527093596, "grad_norm": 0.1293289214372635, "learning_rate": 0.01, "loss": 2.1036, "step": 9756 }, { "epoch": 1.001539408866995, "grad_norm": 0.06052086502313614, "learning_rate": 0.01, "loss": 2.1189, "step": 9759 }, { "epoch": 1.0018472906403941, "grad_norm": 0.07361391931772232, "learning_rate": 0.01, "loss": 2.0962, "step": 9762 }, { "epoch": 1.0021551724137931, "grad_norm": 0.06513562798500061, "learning_rate": 0.01, "loss": 2.129, "step": 9765 }, { "epoch": 1.0024630541871922, "grad_norm": 0.036649156361818314, "learning_rate": 0.01, "loss": 2.0964, "step": 9768 }, { "epoch": 1.0027709359605912, "grad_norm": 0.05371764674782753, "learning_rate": 0.01, "loss": 2.0976, "step": 9771 }, { "epoch": 1.0030788177339902, "grad_norm": 0.06316730380058289, "learning_rate": 0.01, "loss": 2.097, "step": 9774 }, { "epoch": 1.0033866995073892, "grad_norm": 0.03097986802458763, "learning_rate": 0.01, "loss": 2.1128, "step": 9777 }, { "epoch": 1.0036945812807883, "grad_norm": 0.046021945774555206, "learning_rate": 0.01, "loss": 2.1296, "step": 9780 }, { "epoch": 1.0040024630541873, "grad_norm": 0.06580191850662231, "learning_rate": 0.01, "loss": 2.1106, "step": 9783 }, { "epoch": 1.0043103448275863, "grad_norm": 0.054073531180620193, "learning_rate": 0.01, "loss": 2.0986, "step": 9786 }, { "epoch": 1.0046182266009853, "grad_norm": 0.10088641196489334, "learning_rate": 0.01, "loss": 2.1301, "step": 9789 }, { "epoch": 1.0049261083743843, "grad_norm": 0.03944807127118111, "learning_rate": 0.01, "loss": 2.1337, "step": 9792 }, { "epoch": 1.0052339901477831, "grad_norm": 0.07183028757572174, "learning_rate": 0.01, "loss": 2.1272, "step": 9795 }, { "epoch": 1.0055418719211822, "grad_norm": 0.13821956515312195, "learning_rate": 0.01, "loss": 2.1016, "step": 9798 }, { "epoch": 1.0058497536945812, "grad_norm": 0.14031893014907837, "learning_rate": 0.01, "loss": 2.0924, "step": 9801 }, { "epoch": 1.0061576354679802, "grad_norm": 0.06494525820016861, "learning_rate": 0.01, "loss": 2.128, "step": 9804 }, { "epoch": 1.0064655172413792, "grad_norm": 0.05946667864918709, "learning_rate": 0.01, "loss": 2.1335, "step": 9807 }, { "epoch": 1.0067733990147782, "grad_norm": 0.05583272874355316, "learning_rate": 0.01, "loss": 2.1186, "step": 9810 }, { "epoch": 1.0070812807881773, "grad_norm": 0.06858284026384354, "learning_rate": 0.01, "loss": 2.1207, "step": 9813 }, { "epoch": 1.0073891625615763, "grad_norm": 0.05864641070365906, "learning_rate": 0.01, "loss": 2.0869, "step": 9816 }, { "epoch": 1.0076970443349753, "grad_norm": 0.043661102652549744, "learning_rate": 0.01, "loss": 2.1067, "step": 9819 }, { "epoch": 1.0080049261083743, "grad_norm": 0.07878375053405762, "learning_rate": 0.01, "loss": 2.1149, "step": 9822 }, { "epoch": 1.0083128078817734, "grad_norm": 0.04246210679411888, "learning_rate": 0.01, "loss": 2.1241, "step": 9825 }, { "epoch": 1.0086206896551724, "grad_norm": 0.06508597731590271, "learning_rate": 0.01, "loss": 2.1232, "step": 9828 }, { "epoch": 1.0089285714285714, "grad_norm": 0.07472758740186691, "learning_rate": 0.01, "loss": 2.0893, "step": 9831 }, { "epoch": 1.0092364532019704, "grad_norm": 0.13144147396087646, "learning_rate": 0.01, "loss": 2.1194, "step": 9834 }, { "epoch": 1.0095443349753694, "grad_norm": 0.08961367607116699, "learning_rate": 0.01, "loss": 2.1215, "step": 9837 }, { "epoch": 1.0098522167487685, "grad_norm": 0.053439076989889145, "learning_rate": 0.01, "loss": 2.1173, "step": 9840 }, { "epoch": 1.0101600985221675, "grad_norm": 0.03234443441033363, "learning_rate": 0.01, "loss": 2.104, "step": 9843 }, { "epoch": 1.0104679802955665, "grad_norm": 0.07516933977603912, "learning_rate": 0.01, "loss": 2.1186, "step": 9846 }, { "epoch": 1.0107758620689655, "grad_norm": 0.12221794575452805, "learning_rate": 0.01, "loss": 2.0934, "step": 9849 }, { "epoch": 1.0110837438423645, "grad_norm": 0.08198120445013046, "learning_rate": 0.01, "loss": 2.1495, "step": 9852 }, { "epoch": 1.0113916256157636, "grad_norm": 0.058380696922540665, "learning_rate": 0.01, "loss": 2.1234, "step": 9855 }, { "epoch": 1.0116995073891626, "grad_norm": 0.04831172525882721, "learning_rate": 0.01, "loss": 2.0977, "step": 9858 }, { "epoch": 1.0120073891625616, "grad_norm": 0.045920804142951965, "learning_rate": 0.01, "loss": 2.0842, "step": 9861 }, { "epoch": 1.0123152709359606, "grad_norm": 0.12969541549682617, "learning_rate": 0.01, "loss": 2.1005, "step": 9864 }, { "epoch": 1.0126231527093597, "grad_norm": 0.09659627079963684, "learning_rate": 0.01, "loss": 2.1126, "step": 9867 }, { "epoch": 1.0129310344827587, "grad_norm": 0.033160608261823654, "learning_rate": 0.01, "loss": 2.1244, "step": 9870 }, { "epoch": 1.0132389162561577, "grad_norm": 0.03523699939250946, "learning_rate": 0.01, "loss": 2.1009, "step": 9873 }, { "epoch": 1.0135467980295567, "grad_norm": 0.04670235142111778, "learning_rate": 0.01, "loss": 2.1107, "step": 9876 }, { "epoch": 1.0138546798029557, "grad_norm": 0.05278048664331436, "learning_rate": 0.01, "loss": 2.133, "step": 9879 }, { "epoch": 1.0141625615763548, "grad_norm": 0.1409105509519577, "learning_rate": 0.01, "loss": 2.105, "step": 9882 }, { "epoch": 1.0144704433497538, "grad_norm": 0.08208174258470535, "learning_rate": 0.01, "loss": 2.1202, "step": 9885 }, { "epoch": 1.0147783251231528, "grad_norm": 0.052980221807956696, "learning_rate": 0.01, "loss": 2.1108, "step": 9888 }, { "epoch": 1.0150862068965518, "grad_norm": 0.03402642160654068, "learning_rate": 0.01, "loss": 2.1058, "step": 9891 }, { "epoch": 1.0153940886699508, "grad_norm": 0.05165582895278931, "learning_rate": 0.01, "loss": 2.0962, "step": 9894 }, { "epoch": 1.0157019704433496, "grad_norm": 0.0488906130194664, "learning_rate": 0.01, "loss": 2.1157, "step": 9897 }, { "epoch": 1.0160098522167487, "grad_norm": 0.06578544527292252, "learning_rate": 0.01, "loss": 2.0783, "step": 9900 }, { "epoch": 1.0163177339901477, "grad_norm": 0.05930023267865181, "learning_rate": 0.01, "loss": 2.115, "step": 9903 }, { "epoch": 1.0166256157635467, "grad_norm": 0.07461842894554138, "learning_rate": 0.01, "loss": 2.0833, "step": 9906 }, { "epoch": 1.0169334975369457, "grad_norm": 0.04523751139640808, "learning_rate": 0.01, "loss": 2.1062, "step": 9909 }, { "epoch": 1.0172413793103448, "grad_norm": 0.05342249572277069, "learning_rate": 0.01, "loss": 2.1127, "step": 9912 }, { "epoch": 1.0175492610837438, "grad_norm": 0.040748368948698044, "learning_rate": 0.01, "loss": 2.0893, "step": 9915 }, { "epoch": 1.0178571428571428, "grad_norm": 0.03435824438929558, "learning_rate": 0.01, "loss": 2.1151, "step": 9918 }, { "epoch": 1.0181650246305418, "grad_norm": 0.04769265651702881, "learning_rate": 0.01, "loss": 2.0984, "step": 9921 }, { "epoch": 1.0184729064039408, "grad_norm": 0.07814217358827591, "learning_rate": 0.01, "loss": 2.1038, "step": 9924 }, { "epoch": 1.0187807881773399, "grad_norm": 0.12953363358974457, "learning_rate": 0.01, "loss": 2.1051, "step": 9927 }, { "epoch": 1.0190886699507389, "grad_norm": 0.11376773566007614, "learning_rate": 0.01, "loss": 2.0989, "step": 9930 }, { "epoch": 1.019396551724138, "grad_norm": 0.05323106423020363, "learning_rate": 0.01, "loss": 2.1135, "step": 9933 }, { "epoch": 1.019704433497537, "grad_norm": 0.07705114781856537, "learning_rate": 0.01, "loss": 2.1046, "step": 9936 }, { "epoch": 1.020012315270936, "grad_norm": 0.05934451147913933, "learning_rate": 0.01, "loss": 2.1207, "step": 9939 }, { "epoch": 1.020320197044335, "grad_norm": 0.10474961996078491, "learning_rate": 0.01, "loss": 2.1134, "step": 9942 }, { "epoch": 1.020628078817734, "grad_norm": 0.05283385515213013, "learning_rate": 0.01, "loss": 2.1085, "step": 9945 }, { "epoch": 1.020935960591133, "grad_norm": 0.043369196355342865, "learning_rate": 0.01, "loss": 2.1265, "step": 9948 }, { "epoch": 1.021243842364532, "grad_norm": 0.0366055853664875, "learning_rate": 0.01, "loss": 2.1214, "step": 9951 }, { "epoch": 1.021551724137931, "grad_norm": 0.06200672313570976, "learning_rate": 0.01, "loss": 2.0943, "step": 9954 }, { "epoch": 1.02185960591133, "grad_norm": 0.06652572005987167, "learning_rate": 0.01, "loss": 2.1139, "step": 9957 }, { "epoch": 1.022167487684729, "grad_norm": 0.04040740057826042, "learning_rate": 0.01, "loss": 2.0894, "step": 9960 }, { "epoch": 1.0224753694581281, "grad_norm": 0.049162358045578, "learning_rate": 0.01, "loss": 2.0955, "step": 9963 }, { "epoch": 1.0227832512315271, "grad_norm": 0.05465700104832649, "learning_rate": 0.01, "loss": 2.1109, "step": 9966 }, { "epoch": 1.0230911330049262, "grad_norm": 0.0575067512691021, "learning_rate": 0.01, "loss": 2.0956, "step": 9969 }, { "epoch": 1.0233990147783252, "grad_norm": 0.14622198045253754, "learning_rate": 0.01, "loss": 2.1031, "step": 9972 }, { "epoch": 1.0237068965517242, "grad_norm": 0.04765618219971657, "learning_rate": 0.01, "loss": 2.0834, "step": 9975 }, { "epoch": 1.0240147783251232, "grad_norm": 0.04039911553263664, "learning_rate": 0.01, "loss": 2.0933, "step": 9978 }, { "epoch": 1.0243226600985222, "grad_norm": 0.06009029969573021, "learning_rate": 0.01, "loss": 2.115, "step": 9981 }, { "epoch": 1.0246305418719213, "grad_norm": 0.06187298893928528, "learning_rate": 0.01, "loss": 2.079, "step": 9984 }, { "epoch": 1.0249384236453203, "grad_norm": 0.05368026718497276, "learning_rate": 0.01, "loss": 2.0875, "step": 9987 }, { "epoch": 1.0252463054187193, "grad_norm": 0.051921263337135315, "learning_rate": 0.01, "loss": 2.1243, "step": 9990 }, { "epoch": 1.0255541871921183, "grad_norm": 0.09820009768009186, "learning_rate": 0.01, "loss": 2.0983, "step": 9993 }, { "epoch": 1.0258620689655173, "grad_norm": 0.10601375997066498, "learning_rate": 0.01, "loss": 2.1288, "step": 9996 }, { "epoch": 1.0261699507389161, "grad_norm": 0.05488260090351105, "learning_rate": 0.01, "loss": 2.1033, "step": 9999 }, { "epoch": 1.0264778325123152, "grad_norm": 0.07482553273439407, "learning_rate": 0.01, "loss": 2.1181, "step": 10002 }, { "epoch": 1.0267857142857142, "grad_norm": 0.044733475893735886, "learning_rate": 0.01, "loss": 2.1237, "step": 10005 }, { "epoch": 1.0270935960591132, "grad_norm": 0.04775967076420784, "learning_rate": 0.01, "loss": 2.1288, "step": 10008 }, { "epoch": 1.0274014778325122, "grad_norm": 0.05972621962428093, "learning_rate": 0.01, "loss": 2.0878, "step": 10011 }, { "epoch": 1.0277093596059113, "grad_norm": 0.12219330668449402, "learning_rate": 0.01, "loss": 2.1034, "step": 10014 }, { "epoch": 1.0280172413793103, "grad_norm": 0.05171920731663704, "learning_rate": 0.01, "loss": 2.0925, "step": 10017 }, { "epoch": 1.0283251231527093, "grad_norm": 0.04166760668158531, "learning_rate": 0.01, "loss": 2.0928, "step": 10020 }, { "epoch": 1.0286330049261083, "grad_norm": 0.05231022089719772, "learning_rate": 0.01, "loss": 2.0945, "step": 10023 }, { "epoch": 1.0289408866995073, "grad_norm": 0.1091604232788086, "learning_rate": 0.01, "loss": 2.0878, "step": 10026 }, { "epoch": 1.0292487684729064, "grad_norm": 0.07104350626468658, "learning_rate": 0.01, "loss": 2.1125, "step": 10029 }, { "epoch": 1.0295566502463054, "grad_norm": 0.0466371588408947, "learning_rate": 0.01, "loss": 2.0973, "step": 10032 }, { "epoch": 1.0298645320197044, "grad_norm": 0.05548730120062828, "learning_rate": 0.01, "loss": 2.0846, "step": 10035 }, { "epoch": 1.0301724137931034, "grad_norm": 0.06483764201402664, "learning_rate": 0.01, "loss": 2.105, "step": 10038 }, { "epoch": 1.0304802955665024, "grad_norm": 0.05243910476565361, "learning_rate": 0.01, "loss": 2.1011, "step": 10041 }, { "epoch": 1.0307881773399015, "grad_norm": 0.09996815025806427, "learning_rate": 0.01, "loss": 2.1389, "step": 10044 }, { "epoch": 1.0310960591133005, "grad_norm": 0.04864559695124626, "learning_rate": 0.01, "loss": 2.1069, "step": 10047 }, { "epoch": 1.0314039408866995, "grad_norm": 0.14447607100009918, "learning_rate": 0.01, "loss": 2.1075, "step": 10050 }, { "epoch": 1.0317118226600985, "grad_norm": 0.050261352211236954, "learning_rate": 0.01, "loss": 2.1147, "step": 10053 }, { "epoch": 1.0320197044334976, "grad_norm": 0.07719244807958603, "learning_rate": 0.01, "loss": 2.1061, "step": 10056 }, { "epoch": 1.0323275862068966, "grad_norm": 0.10620381683111191, "learning_rate": 0.01, "loss": 2.1129, "step": 10059 }, { "epoch": 1.0326354679802956, "grad_norm": 0.05358508229255676, "learning_rate": 0.01, "loss": 2.1156, "step": 10062 }, { "epoch": 1.0329433497536946, "grad_norm": 0.04341145232319832, "learning_rate": 0.01, "loss": 2.1046, "step": 10065 }, { "epoch": 1.0332512315270936, "grad_norm": 0.04785105213522911, "learning_rate": 0.01, "loss": 2.0804, "step": 10068 }, { "epoch": 1.0335591133004927, "grad_norm": 0.04886849224567413, "learning_rate": 0.01, "loss": 2.0691, "step": 10071 }, { "epoch": 1.0338669950738917, "grad_norm": 0.03917735815048218, "learning_rate": 0.01, "loss": 2.0808, "step": 10074 }, { "epoch": 1.0341748768472907, "grad_norm": 0.10696244239807129, "learning_rate": 0.01, "loss": 2.085, "step": 10077 }, { "epoch": 1.0344827586206897, "grad_norm": 0.14525163173675537, "learning_rate": 0.01, "loss": 2.1246, "step": 10080 }, { "epoch": 1.0347906403940887, "grad_norm": 0.06464140862226486, "learning_rate": 0.01, "loss": 2.1088, "step": 10083 }, { "epoch": 1.0350985221674878, "grad_norm": 0.055628299713134766, "learning_rate": 0.01, "loss": 2.1013, "step": 10086 }, { "epoch": 1.0354064039408868, "grad_norm": 0.0457589291036129, "learning_rate": 0.01, "loss": 2.118, "step": 10089 }, { "epoch": 1.0357142857142858, "grad_norm": 0.07108809798955917, "learning_rate": 0.01, "loss": 2.0882, "step": 10092 }, { "epoch": 1.0360221674876848, "grad_norm": 0.07304032146930695, "learning_rate": 0.01, "loss": 2.1632, "step": 10095 }, { "epoch": 1.0363300492610836, "grad_norm": 0.04778844490647316, "learning_rate": 0.01, "loss": 2.1076, "step": 10098 }, { "epoch": 1.0366379310344827, "grad_norm": 0.0444946251809597, "learning_rate": 0.01, "loss": 2.1092, "step": 10101 }, { "epoch": 1.0369458128078817, "grad_norm": 0.03863450884819031, "learning_rate": 0.01, "loss": 2.0973, "step": 10104 }, { "epoch": 1.0372536945812807, "grad_norm": 0.11049003899097443, "learning_rate": 0.01, "loss": 2.1069, "step": 10107 }, { "epoch": 1.0375615763546797, "grad_norm": 0.055413637310266495, "learning_rate": 0.01, "loss": 2.0935, "step": 10110 }, { "epoch": 1.0378694581280787, "grad_norm": 0.1212301105260849, "learning_rate": 0.01, "loss": 2.1033, "step": 10113 }, { "epoch": 1.0381773399014778, "grad_norm": 0.06444283574819565, "learning_rate": 0.01, "loss": 2.0821, "step": 10116 }, { "epoch": 1.0384852216748768, "grad_norm": 0.048522353172302246, "learning_rate": 0.01, "loss": 2.1129, "step": 10119 }, { "epoch": 1.0387931034482758, "grad_norm": 0.03755674138665199, "learning_rate": 0.01, "loss": 2.0773, "step": 10122 }, { "epoch": 1.0391009852216748, "grad_norm": 0.03873259574174881, "learning_rate": 0.01, "loss": 2.0877, "step": 10125 }, { "epoch": 1.0394088669950738, "grad_norm": 0.062387898564338684, "learning_rate": 0.01, "loss": 2.1119, "step": 10128 }, { "epoch": 1.0397167487684729, "grad_norm": 0.037559203803539276, "learning_rate": 0.01, "loss": 2.1165, "step": 10131 }, { "epoch": 1.0400246305418719, "grad_norm": 0.0703917145729065, "learning_rate": 0.01, "loss": 2.0877, "step": 10134 }, { "epoch": 1.040332512315271, "grad_norm": 0.05063795670866966, "learning_rate": 0.01, "loss": 2.1282, "step": 10137 }, { "epoch": 1.04064039408867, "grad_norm": 0.08476493507623672, "learning_rate": 0.01, "loss": 2.1217, "step": 10140 }, { "epoch": 1.040948275862069, "grad_norm": 0.09482383728027344, "learning_rate": 0.01, "loss": 2.1002, "step": 10143 }, { "epoch": 1.041256157635468, "grad_norm": 0.1094396710395813, "learning_rate": 0.01, "loss": 2.1138, "step": 10146 }, { "epoch": 1.041564039408867, "grad_norm": 0.17252720892429352, "learning_rate": 0.01, "loss": 2.1079, "step": 10149 }, { "epoch": 1.041871921182266, "grad_norm": 0.11076754331588745, "learning_rate": 0.01, "loss": 2.1198, "step": 10152 }, { "epoch": 1.042179802955665, "grad_norm": 0.06879215687513351, "learning_rate": 0.01, "loss": 2.0878, "step": 10155 }, { "epoch": 1.042487684729064, "grad_norm": 0.07402212172746658, "learning_rate": 0.01, "loss": 2.0869, "step": 10158 }, { "epoch": 1.042795566502463, "grad_norm": 0.04562051594257355, "learning_rate": 0.01, "loss": 2.1139, "step": 10161 }, { "epoch": 1.043103448275862, "grad_norm": 0.04578396677970886, "learning_rate": 0.01, "loss": 2.0974, "step": 10164 }, { "epoch": 1.0434113300492611, "grad_norm": 0.051678020507097244, "learning_rate": 0.01, "loss": 2.0995, "step": 10167 }, { "epoch": 1.0437192118226601, "grad_norm": 0.03445015102624893, "learning_rate": 0.01, "loss": 2.106, "step": 10170 }, { "epoch": 1.0440270935960592, "grad_norm": 0.03868851810693741, "learning_rate": 0.01, "loss": 2.0732, "step": 10173 }, { "epoch": 1.0443349753694582, "grad_norm": 0.058904558420181274, "learning_rate": 0.01, "loss": 2.085, "step": 10176 }, { "epoch": 1.0446428571428572, "grad_norm": 0.10729484260082245, "learning_rate": 0.01, "loss": 2.0909, "step": 10179 }, { "epoch": 1.0449507389162562, "grad_norm": 0.10037554055452347, "learning_rate": 0.01, "loss": 2.0945, "step": 10182 }, { "epoch": 1.0452586206896552, "grad_norm": 0.07336730509996414, "learning_rate": 0.01, "loss": 2.0885, "step": 10185 }, { "epoch": 1.0455665024630543, "grad_norm": 0.11717227101325989, "learning_rate": 0.01, "loss": 2.1019, "step": 10188 }, { "epoch": 1.0458743842364533, "grad_norm": 0.06263696402311325, "learning_rate": 0.01, "loss": 2.1113, "step": 10191 }, { "epoch": 1.0461822660098523, "grad_norm": 0.07939436286687851, "learning_rate": 0.01, "loss": 2.0803, "step": 10194 }, { "epoch": 1.0464901477832513, "grad_norm": 0.05761004984378815, "learning_rate": 0.01, "loss": 2.1155, "step": 10197 }, { "epoch": 1.0467980295566504, "grad_norm": 0.04293765127658844, "learning_rate": 0.01, "loss": 2.0944, "step": 10200 }, { "epoch": 1.0471059113300492, "grad_norm": 0.04638001322746277, "learning_rate": 0.01, "loss": 2.113, "step": 10203 }, { "epoch": 1.0474137931034482, "grad_norm": 0.047882046550512314, "learning_rate": 0.01, "loss": 2.0733, "step": 10206 }, { "epoch": 1.0477216748768472, "grad_norm": 0.07461071759462357, "learning_rate": 0.01, "loss": 2.107, "step": 10209 }, { "epoch": 1.0480295566502462, "grad_norm": 0.10987289249897003, "learning_rate": 0.01, "loss": 2.105, "step": 10212 }, { "epoch": 1.0483374384236452, "grad_norm": 0.04183235019445419, "learning_rate": 0.01, "loss": 2.0953, "step": 10215 }, { "epoch": 1.0486453201970443, "grad_norm": 0.049700990319252014, "learning_rate": 0.01, "loss": 2.1067, "step": 10218 }, { "epoch": 1.0489532019704433, "grad_norm": 0.08448828011751175, "learning_rate": 0.01, "loss": 2.1113, "step": 10221 }, { "epoch": 1.0492610837438423, "grad_norm": 0.05486508831381798, "learning_rate": 0.01, "loss": 2.1156, "step": 10224 }, { "epoch": 1.0495689655172413, "grad_norm": 0.057925377041101456, "learning_rate": 0.01, "loss": 2.127, "step": 10227 }, { "epoch": 1.0498768472906403, "grad_norm": 0.05322302505373955, "learning_rate": 0.01, "loss": 2.0861, "step": 10230 }, { "epoch": 1.0501847290640394, "grad_norm": 0.046823181211948395, "learning_rate": 0.01, "loss": 2.089, "step": 10233 }, { "epoch": 1.0504926108374384, "grad_norm": 0.05037027224898338, "learning_rate": 0.01, "loss": 2.0841, "step": 10236 }, { "epoch": 1.0508004926108374, "grad_norm": 0.05172303318977356, "learning_rate": 0.01, "loss": 2.105, "step": 10239 }, { "epoch": 1.0511083743842364, "grad_norm": 0.07993052154779434, "learning_rate": 0.01, "loss": 2.1097, "step": 10242 }, { "epoch": 1.0514162561576355, "grad_norm": 0.039322953671216965, "learning_rate": 0.01, "loss": 2.0951, "step": 10245 }, { "epoch": 1.0517241379310345, "grad_norm": 0.05829343572258949, "learning_rate": 0.01, "loss": 2.1257, "step": 10248 }, { "epoch": 1.0520320197044335, "grad_norm": 0.12303601950407028, "learning_rate": 0.01, "loss": 2.1143, "step": 10251 }, { "epoch": 1.0523399014778325, "grad_norm": 0.07176418602466583, "learning_rate": 0.01, "loss": 2.1297, "step": 10254 }, { "epoch": 1.0526477832512315, "grad_norm": 0.05229344964027405, "learning_rate": 0.01, "loss": 2.0934, "step": 10257 }, { "epoch": 1.0529556650246306, "grad_norm": 0.041665658354759216, "learning_rate": 0.01, "loss": 2.116, "step": 10260 }, { "epoch": 1.0532635467980296, "grad_norm": 0.04542261362075806, "learning_rate": 0.01, "loss": 2.1277, "step": 10263 }, { "epoch": 1.0535714285714286, "grad_norm": 0.0501495897769928, "learning_rate": 0.01, "loss": 2.0911, "step": 10266 }, { "epoch": 1.0538793103448276, "grad_norm": 0.06474924832582474, "learning_rate": 0.01, "loss": 2.1254, "step": 10269 }, { "epoch": 1.0541871921182266, "grad_norm": 0.0736108273267746, "learning_rate": 0.01, "loss": 2.0685, "step": 10272 }, { "epoch": 1.0544950738916257, "grad_norm": 0.07487022131681442, "learning_rate": 0.01, "loss": 2.113, "step": 10275 }, { "epoch": 1.0548029556650247, "grad_norm": 0.04876410961151123, "learning_rate": 0.01, "loss": 2.1051, "step": 10278 }, { "epoch": 1.0551108374384237, "grad_norm": 0.056595779955387115, "learning_rate": 0.01, "loss": 2.0864, "step": 10281 }, { "epoch": 1.0554187192118227, "grad_norm": 0.06958241015672684, "learning_rate": 0.01, "loss": 2.1, "step": 10284 }, { "epoch": 1.0557266009852218, "grad_norm": 0.08811846375465393, "learning_rate": 0.01, "loss": 2.1021, "step": 10287 }, { "epoch": 1.0560344827586208, "grad_norm": 0.061557747423648834, "learning_rate": 0.01, "loss": 2.1063, "step": 10290 }, { "epoch": 1.0563423645320198, "grad_norm": 0.07043389976024628, "learning_rate": 0.01, "loss": 2.106, "step": 10293 }, { "epoch": 1.0566502463054188, "grad_norm": 0.0916379988193512, "learning_rate": 0.01, "loss": 2.0851, "step": 10296 }, { "epoch": 1.0569581280788178, "grad_norm": 0.050577979534864426, "learning_rate": 0.01, "loss": 2.0966, "step": 10299 }, { "epoch": 1.0572660098522166, "grad_norm": 0.06576110422611237, "learning_rate": 0.01, "loss": 2.1038, "step": 10302 }, { "epoch": 1.0575738916256157, "grad_norm": 0.09315023571252823, "learning_rate": 0.01, "loss": 2.1341, "step": 10305 }, { "epoch": 1.0578817733990147, "grad_norm": 0.0649820864200592, "learning_rate": 0.01, "loss": 2.1064, "step": 10308 }, { "epoch": 1.0581896551724137, "grad_norm": 0.07930494844913483, "learning_rate": 0.01, "loss": 2.107, "step": 10311 }, { "epoch": 1.0584975369458127, "grad_norm": 0.09142257273197174, "learning_rate": 0.01, "loss": 2.1162, "step": 10314 }, { "epoch": 1.0588054187192117, "grad_norm": 0.05011974647641182, "learning_rate": 0.01, "loss": 2.0686, "step": 10317 }, { "epoch": 1.0591133004926108, "grad_norm": 0.1002635508775711, "learning_rate": 0.01, "loss": 2.138, "step": 10320 }, { "epoch": 1.0594211822660098, "grad_norm": 0.07570278644561768, "learning_rate": 0.01, "loss": 2.0693, "step": 10323 }, { "epoch": 1.0597290640394088, "grad_norm": 0.05086719989776611, "learning_rate": 0.01, "loss": 2.0991, "step": 10326 }, { "epoch": 1.0600369458128078, "grad_norm": 0.03596855327486992, "learning_rate": 0.01, "loss": 2.1038, "step": 10329 }, { "epoch": 1.0603448275862069, "grad_norm": 0.05059434473514557, "learning_rate": 0.01, "loss": 2.1, "step": 10332 }, { "epoch": 1.0606527093596059, "grad_norm": 0.058818116784095764, "learning_rate": 0.01, "loss": 2.0855, "step": 10335 }, { "epoch": 1.060960591133005, "grad_norm": 0.14139403402805328, "learning_rate": 0.01, "loss": 2.0755, "step": 10338 }, { "epoch": 1.061268472906404, "grad_norm": 0.12123113870620728, "learning_rate": 0.01, "loss": 2.0896, "step": 10341 }, { "epoch": 1.061576354679803, "grad_norm": 0.04767270013689995, "learning_rate": 0.01, "loss": 2.11, "step": 10344 }, { "epoch": 1.061884236453202, "grad_norm": 0.03506815433502197, "learning_rate": 0.01, "loss": 2.0953, "step": 10347 }, { "epoch": 1.062192118226601, "grad_norm": 0.08807789534330368, "learning_rate": 0.01, "loss": 2.0903, "step": 10350 }, { "epoch": 1.0625, "grad_norm": 0.1130862608551979, "learning_rate": 0.01, "loss": 2.0888, "step": 10353 }, { "epoch": 1.062807881773399, "grad_norm": 0.05720696598291397, "learning_rate": 0.01, "loss": 2.0904, "step": 10356 }, { "epoch": 1.063115763546798, "grad_norm": 0.057933416217565536, "learning_rate": 0.01, "loss": 2.1138, "step": 10359 }, { "epoch": 1.063423645320197, "grad_norm": 0.056713253259658813, "learning_rate": 0.01, "loss": 2.0965, "step": 10362 }, { "epoch": 1.063731527093596, "grad_norm": 0.05062280595302582, "learning_rate": 0.01, "loss": 2.1058, "step": 10365 }, { "epoch": 1.064039408866995, "grad_norm": 0.03439073637127876, "learning_rate": 0.01, "loss": 2.0945, "step": 10368 }, { "epoch": 1.0643472906403941, "grad_norm": 0.10244173556566238, "learning_rate": 0.01, "loss": 2.0916, "step": 10371 }, { "epoch": 1.0646551724137931, "grad_norm": 0.04706069454550743, "learning_rate": 0.01, "loss": 2.103, "step": 10374 }, { "epoch": 1.0649630541871922, "grad_norm": 0.11580058932304382, "learning_rate": 0.01, "loss": 2.0995, "step": 10377 }, { "epoch": 1.0652709359605912, "grad_norm": 0.044736508280038834, "learning_rate": 0.01, "loss": 2.0906, "step": 10380 }, { "epoch": 1.0655788177339902, "grad_norm": 0.08990567922592163, "learning_rate": 0.01, "loss": 2.1197, "step": 10383 }, { "epoch": 1.0658866995073892, "grad_norm": 0.06923419237136841, "learning_rate": 0.01, "loss": 2.0997, "step": 10386 }, { "epoch": 1.0661945812807883, "grad_norm": 0.059495240449905396, "learning_rate": 0.01, "loss": 2.1106, "step": 10389 }, { "epoch": 1.0665024630541873, "grad_norm": 0.07906550914049149, "learning_rate": 0.01, "loss": 2.1196, "step": 10392 }, { "epoch": 1.0668103448275863, "grad_norm": 0.08792297542095184, "learning_rate": 0.01, "loss": 2.0985, "step": 10395 }, { "epoch": 1.0671182266009853, "grad_norm": 0.06077072396874428, "learning_rate": 0.01, "loss": 2.088, "step": 10398 }, { "epoch": 1.0674261083743843, "grad_norm": 0.03865751996636391, "learning_rate": 0.01, "loss": 2.0894, "step": 10401 }, { "epoch": 1.0677339901477834, "grad_norm": 0.03158612549304962, "learning_rate": 0.01, "loss": 2.0861, "step": 10404 }, { "epoch": 1.0680418719211822, "grad_norm": 0.03455328568816185, "learning_rate": 0.01, "loss": 2.0819, "step": 10407 }, { "epoch": 1.0683497536945812, "grad_norm": 0.062100328505039215, "learning_rate": 0.01, "loss": 2.0967, "step": 10410 }, { "epoch": 1.0686576354679802, "grad_norm": 0.10934283584356308, "learning_rate": 0.01, "loss": 2.1135, "step": 10413 }, { "epoch": 1.0689655172413792, "grad_norm": 0.07184179127216339, "learning_rate": 0.01, "loss": 2.0949, "step": 10416 }, { "epoch": 1.0692733990147782, "grad_norm": 0.06610151380300522, "learning_rate": 0.01, "loss": 2.1001, "step": 10419 }, { "epoch": 1.0695812807881773, "grad_norm": 0.06064629554748535, "learning_rate": 0.01, "loss": 2.0835, "step": 10422 }, { "epoch": 1.0698891625615763, "grad_norm": 0.0531432181596756, "learning_rate": 0.01, "loss": 2.1105, "step": 10425 }, { "epoch": 1.0701970443349753, "grad_norm": 0.056448470801115036, "learning_rate": 0.01, "loss": 2.0724, "step": 10428 }, { "epoch": 1.0705049261083743, "grad_norm": 0.03736816346645355, "learning_rate": 0.01, "loss": 2.1243, "step": 10431 }, { "epoch": 1.0708128078817734, "grad_norm": 0.12693117558956146, "learning_rate": 0.01, "loss": 2.1156, "step": 10434 }, { "epoch": 1.0711206896551724, "grad_norm": 0.0428193174302578, "learning_rate": 0.01, "loss": 2.1025, "step": 10437 }, { "epoch": 1.0714285714285714, "grad_norm": 0.0464596189558506, "learning_rate": 0.01, "loss": 2.1067, "step": 10440 }, { "epoch": 1.0717364532019704, "grad_norm": 0.07535267621278763, "learning_rate": 0.01, "loss": 2.0785, "step": 10443 }, { "epoch": 1.0720443349753694, "grad_norm": 0.0537327378988266, "learning_rate": 0.01, "loss": 2.0775, "step": 10446 }, { "epoch": 1.0723522167487685, "grad_norm": 0.03783145919442177, "learning_rate": 0.01, "loss": 2.0921, "step": 10449 }, { "epoch": 1.0726600985221675, "grad_norm": 0.052689142525196075, "learning_rate": 0.01, "loss": 2.116, "step": 10452 }, { "epoch": 1.0729679802955665, "grad_norm": 0.1437288373708725, "learning_rate": 0.01, "loss": 2.071, "step": 10455 }, { "epoch": 1.0732758620689655, "grad_norm": 0.07633062452077866, "learning_rate": 0.01, "loss": 2.088, "step": 10458 }, { "epoch": 1.0735837438423645, "grad_norm": 0.061189718544483185, "learning_rate": 0.01, "loss": 2.0796, "step": 10461 }, { "epoch": 1.0738916256157636, "grad_norm": 0.06256800144910812, "learning_rate": 0.01, "loss": 2.1056, "step": 10464 }, { "epoch": 1.0741995073891626, "grad_norm": 0.0745188519358635, "learning_rate": 0.01, "loss": 2.0782, "step": 10467 }, { "epoch": 1.0745073891625616, "grad_norm": 0.0663486197590828, "learning_rate": 0.01, "loss": 2.0704, "step": 10470 }, { "epoch": 1.0748152709359606, "grad_norm": 0.05472427234053612, "learning_rate": 0.01, "loss": 2.082, "step": 10473 }, { "epoch": 1.0751231527093597, "grad_norm": 0.10171230137348175, "learning_rate": 0.01, "loss": 2.1135, "step": 10476 }, { "epoch": 1.0754310344827587, "grad_norm": 0.05689026787877083, "learning_rate": 0.01, "loss": 2.0748, "step": 10479 }, { "epoch": 1.0757389162561577, "grad_norm": 0.0593440905213356, "learning_rate": 0.01, "loss": 2.0922, "step": 10482 }, { "epoch": 1.0760467980295567, "grad_norm": 0.07408995181322098, "learning_rate": 0.01, "loss": 2.0781, "step": 10485 }, { "epoch": 1.0763546798029557, "grad_norm": 0.05688070133328438, "learning_rate": 0.01, "loss": 2.1085, "step": 10488 }, { "epoch": 1.0766625615763548, "grad_norm": 0.05378828942775726, "learning_rate": 0.01, "loss": 2.1084, "step": 10491 }, { "epoch": 1.0769704433497538, "grad_norm": 0.057735592126846313, "learning_rate": 0.01, "loss": 2.1023, "step": 10494 }, { "epoch": 1.0772783251231528, "grad_norm": 0.0586666576564312, "learning_rate": 0.01, "loss": 2.1003, "step": 10497 }, { "epoch": 1.0775862068965518, "grad_norm": 0.12087473273277283, "learning_rate": 0.01, "loss": 2.0974, "step": 10500 }, { "epoch": 1.0778940886699506, "grad_norm": 0.07307861000299454, "learning_rate": 0.01, "loss": 2.0913, "step": 10503 }, { "epoch": 1.0782019704433496, "grad_norm": 0.06621012091636658, "learning_rate": 0.01, "loss": 2.1173, "step": 10506 }, { "epoch": 1.0785098522167487, "grad_norm": 0.0647876039147377, "learning_rate": 0.01, "loss": 2.1006, "step": 10509 }, { "epoch": 1.0788177339901477, "grad_norm": 0.06163914501667023, "learning_rate": 0.01, "loss": 2.0892, "step": 10512 }, { "epoch": 1.0791256157635467, "grad_norm": 0.04312353581190109, "learning_rate": 0.01, "loss": 2.0901, "step": 10515 }, { "epoch": 1.0794334975369457, "grad_norm": 0.0760812908411026, "learning_rate": 0.01, "loss": 2.0995, "step": 10518 }, { "epoch": 1.0797413793103448, "grad_norm": 0.0802140161395073, "learning_rate": 0.01, "loss": 2.0905, "step": 10521 }, { "epoch": 1.0800492610837438, "grad_norm": 0.09008529782295227, "learning_rate": 0.01, "loss": 2.08, "step": 10524 }, { "epoch": 1.0803571428571428, "grad_norm": 0.07469696551561356, "learning_rate": 0.01, "loss": 2.0725, "step": 10527 }, { "epoch": 1.0806650246305418, "grad_norm": 0.08821582794189453, "learning_rate": 0.01, "loss": 2.1086, "step": 10530 }, { "epoch": 1.0809729064039408, "grad_norm": 0.04690997302532196, "learning_rate": 0.01, "loss": 2.1095, "step": 10533 }, { "epoch": 1.0812807881773399, "grad_norm": 0.04316158965229988, "learning_rate": 0.01, "loss": 2.0818, "step": 10536 }, { "epoch": 1.0815886699507389, "grad_norm": 0.06996279209852219, "learning_rate": 0.01, "loss": 2.0993, "step": 10539 }, { "epoch": 1.081896551724138, "grad_norm": 0.10073279589414597, "learning_rate": 0.01, "loss": 2.112, "step": 10542 }, { "epoch": 1.082204433497537, "grad_norm": 0.0448322668671608, "learning_rate": 0.01, "loss": 2.0834, "step": 10545 }, { "epoch": 1.082512315270936, "grad_norm": 0.11411638557910919, "learning_rate": 0.01, "loss": 2.1082, "step": 10548 }, { "epoch": 1.082820197044335, "grad_norm": 0.10779088735580444, "learning_rate": 0.01, "loss": 2.0702, "step": 10551 }, { "epoch": 1.083128078817734, "grad_norm": 0.041448626667261124, "learning_rate": 0.01, "loss": 2.1041, "step": 10554 }, { "epoch": 1.083435960591133, "grad_norm": 0.07522560656070709, "learning_rate": 0.01, "loss": 2.0794, "step": 10557 }, { "epoch": 1.083743842364532, "grad_norm": 0.048221901059150696, "learning_rate": 0.01, "loss": 2.0936, "step": 10560 }, { "epoch": 1.084051724137931, "grad_norm": 0.05512038618326187, "learning_rate": 0.01, "loss": 2.0898, "step": 10563 }, { "epoch": 1.08435960591133, "grad_norm": 0.07599300891160965, "learning_rate": 0.01, "loss": 2.1246, "step": 10566 }, { "epoch": 1.084667487684729, "grad_norm": 0.06631644070148468, "learning_rate": 0.01, "loss": 2.0861, "step": 10569 }, { "epoch": 1.0849753694581281, "grad_norm": 0.04972488060593605, "learning_rate": 0.01, "loss": 2.11, "step": 10572 }, { "epoch": 1.0852832512315271, "grad_norm": 0.08250217139720917, "learning_rate": 0.01, "loss": 2.1142, "step": 10575 }, { "epoch": 1.0855911330049262, "grad_norm": 0.09104974567890167, "learning_rate": 0.01, "loss": 2.0822, "step": 10578 }, { "epoch": 1.0858990147783252, "grad_norm": 0.057310063391923904, "learning_rate": 0.01, "loss": 2.0819, "step": 10581 }, { "epoch": 1.0862068965517242, "grad_norm": 0.08102291077375412, "learning_rate": 0.01, "loss": 2.0931, "step": 10584 }, { "epoch": 1.0865147783251232, "grad_norm": 0.045641325414180756, "learning_rate": 0.01, "loss": 2.1096, "step": 10587 }, { "epoch": 1.0868226600985222, "grad_norm": 0.05350523442029953, "learning_rate": 0.01, "loss": 2.1151, "step": 10590 }, { "epoch": 1.0871305418719213, "grad_norm": 0.045734379440546036, "learning_rate": 0.01, "loss": 2.1043, "step": 10593 }, { "epoch": 1.0874384236453203, "grad_norm": 0.044645924121141434, "learning_rate": 0.01, "loss": 2.0882, "step": 10596 }, { "epoch": 1.0877463054187193, "grad_norm": 0.046704743057489395, "learning_rate": 0.01, "loss": 2.0823, "step": 10599 }, { "epoch": 1.0880541871921183, "grad_norm": 0.09600807726383209, "learning_rate": 0.01, "loss": 2.09, "step": 10602 }, { "epoch": 1.0883620689655173, "grad_norm": 0.062323443591594696, "learning_rate": 0.01, "loss": 2.091, "step": 10605 }, { "epoch": 1.0886699507389164, "grad_norm": 0.08459887653589249, "learning_rate": 0.01, "loss": 2.094, "step": 10608 }, { "epoch": 1.0889778325123152, "grad_norm": 0.0621943362057209, "learning_rate": 0.01, "loss": 2.0735, "step": 10611 }, { "epoch": 1.0892857142857142, "grad_norm": 0.10963741689920425, "learning_rate": 0.01, "loss": 2.0769, "step": 10614 }, { "epoch": 1.0895935960591132, "grad_norm": 0.07325689494609833, "learning_rate": 0.01, "loss": 2.0905, "step": 10617 }, { "epoch": 1.0899014778325122, "grad_norm": 0.08307964354753494, "learning_rate": 0.01, "loss": 2.0977, "step": 10620 }, { "epoch": 1.0902093596059113, "grad_norm": 0.18072094023227692, "learning_rate": 0.01, "loss": 2.1096, "step": 10623 }, { "epoch": 1.0905172413793103, "grad_norm": 0.10427471250295639, "learning_rate": 0.01, "loss": 2.0761, "step": 10626 }, { "epoch": 1.0908251231527093, "grad_norm": 0.0732191875576973, "learning_rate": 0.01, "loss": 2.1031, "step": 10629 }, { "epoch": 1.0911330049261083, "grad_norm": 0.03703717514872551, "learning_rate": 0.01, "loss": 2.0734, "step": 10632 }, { "epoch": 1.0914408866995073, "grad_norm": 0.04907006770372391, "learning_rate": 0.01, "loss": 2.096, "step": 10635 }, { "epoch": 1.0917487684729064, "grad_norm": 0.04126304015517235, "learning_rate": 0.01, "loss": 2.0824, "step": 10638 }, { "epoch": 1.0920566502463054, "grad_norm": 0.04017401486635208, "learning_rate": 0.01, "loss": 2.0694, "step": 10641 }, { "epoch": 1.0923645320197044, "grad_norm": 0.036132264882326126, "learning_rate": 0.01, "loss": 2.0792, "step": 10644 }, { "epoch": 1.0926724137931034, "grad_norm": 0.06275150179862976, "learning_rate": 0.01, "loss": 2.1172, "step": 10647 }, { "epoch": 1.0929802955665024, "grad_norm": 0.08319203555583954, "learning_rate": 0.01, "loss": 2.0868, "step": 10650 }, { "epoch": 1.0932881773399015, "grad_norm": 0.08663000166416168, "learning_rate": 0.01, "loss": 2.0834, "step": 10653 }, { "epoch": 1.0935960591133005, "grad_norm": 0.10765951871871948, "learning_rate": 0.01, "loss": 2.0891, "step": 10656 }, { "epoch": 1.0939039408866995, "grad_norm": 0.035412587225437164, "learning_rate": 0.01, "loss": 2.0912, "step": 10659 }, { "epoch": 1.0942118226600985, "grad_norm": 0.051735054701566696, "learning_rate": 0.01, "loss": 2.0986, "step": 10662 }, { "epoch": 1.0945197044334976, "grad_norm": 0.04320614039897919, "learning_rate": 0.01, "loss": 2.0912, "step": 10665 }, { "epoch": 1.0948275862068966, "grad_norm": 0.03285462409257889, "learning_rate": 0.01, "loss": 2.0957, "step": 10668 }, { "epoch": 1.0951354679802956, "grad_norm": 0.05172726511955261, "learning_rate": 0.01, "loss": 2.0706, "step": 10671 }, { "epoch": 1.0954433497536946, "grad_norm": 0.04941645637154579, "learning_rate": 0.01, "loss": 2.1018, "step": 10674 }, { "epoch": 1.0957512315270936, "grad_norm": 0.04746576398611069, "learning_rate": 0.01, "loss": 2.1002, "step": 10677 }, { "epoch": 1.0960591133004927, "grad_norm": 0.10900839418172836, "learning_rate": 0.01, "loss": 2.1188, "step": 10680 }, { "epoch": 1.0963669950738917, "grad_norm": 0.06924229860305786, "learning_rate": 0.01, "loss": 2.097, "step": 10683 }, { "epoch": 1.0966748768472907, "grad_norm": 0.11047599464654922, "learning_rate": 0.01, "loss": 2.0607, "step": 10686 }, { "epoch": 1.0969827586206897, "grad_norm": 0.10662158578634262, "learning_rate": 0.01, "loss": 2.078, "step": 10689 }, { "epoch": 1.0972906403940887, "grad_norm": 0.07408568263053894, "learning_rate": 0.01, "loss": 2.0918, "step": 10692 }, { "epoch": 1.0975985221674878, "grad_norm": 0.0471009686589241, "learning_rate": 0.01, "loss": 2.1248, "step": 10695 }, { "epoch": 1.0979064039408868, "grad_norm": 0.049591194838285446, "learning_rate": 0.01, "loss": 2.082, "step": 10698 }, { "epoch": 1.0982142857142858, "grad_norm": 0.0919683426618576, "learning_rate": 0.01, "loss": 2.1229, "step": 10701 }, { "epoch": 1.0985221674876848, "grad_norm": 0.05292963236570358, "learning_rate": 0.01, "loss": 2.1097, "step": 10704 }, { "epoch": 1.0988300492610836, "grad_norm": 0.053880974650382996, "learning_rate": 0.01, "loss": 2.0787, "step": 10707 }, { "epoch": 1.0991379310344827, "grad_norm": 0.05608196556568146, "learning_rate": 0.01, "loss": 2.0735, "step": 10710 }, { "epoch": 1.0994458128078817, "grad_norm": 0.06456641852855682, "learning_rate": 0.01, "loss": 2.1148, "step": 10713 }, { "epoch": 1.0997536945812807, "grad_norm": 0.08165917545557022, "learning_rate": 0.01, "loss": 2.1199, "step": 10716 }, { "epoch": 1.1000615763546797, "grad_norm": 0.0773044228553772, "learning_rate": 0.01, "loss": 2.0972, "step": 10719 }, { "epoch": 1.1003694581280787, "grad_norm": 0.07669848203659058, "learning_rate": 0.01, "loss": 2.101, "step": 10722 }, { "epoch": 1.1006773399014778, "grad_norm": 0.0773942843079567, "learning_rate": 0.01, "loss": 2.0573, "step": 10725 }, { "epoch": 1.1009852216748768, "grad_norm": 0.06698640435934067, "learning_rate": 0.01, "loss": 2.1189, "step": 10728 }, { "epoch": 1.1012931034482758, "grad_norm": 0.098200224339962, "learning_rate": 0.01, "loss": 2.0739, "step": 10731 }, { "epoch": 1.1016009852216748, "grad_norm": 0.06676481664180756, "learning_rate": 0.01, "loss": 2.097, "step": 10734 }, { "epoch": 1.1019088669950738, "grad_norm": 0.03925321251153946, "learning_rate": 0.01, "loss": 2.0904, "step": 10737 }, { "epoch": 1.1022167487684729, "grad_norm": 0.08387935161590576, "learning_rate": 0.01, "loss": 2.1069, "step": 10740 }, { "epoch": 1.1025246305418719, "grad_norm": 0.06382130831480026, "learning_rate": 0.01, "loss": 2.091, "step": 10743 }, { "epoch": 1.102832512315271, "grad_norm": 0.04457903653383255, "learning_rate": 0.01, "loss": 2.074, "step": 10746 }, { "epoch": 1.10314039408867, "grad_norm": 0.057858239859342575, "learning_rate": 0.01, "loss": 2.1021, "step": 10749 }, { "epoch": 1.103448275862069, "grad_norm": 0.055992983281612396, "learning_rate": 0.01, "loss": 2.0894, "step": 10752 }, { "epoch": 1.103756157635468, "grad_norm": 0.10200835764408112, "learning_rate": 0.01, "loss": 2.0948, "step": 10755 }, { "epoch": 1.104064039408867, "grad_norm": 0.11163626611232758, "learning_rate": 0.01, "loss": 2.0963, "step": 10758 }, { "epoch": 1.104371921182266, "grad_norm": 0.11462046951055527, "learning_rate": 0.01, "loss": 2.0808, "step": 10761 }, { "epoch": 1.104679802955665, "grad_norm": 0.08823121339082718, "learning_rate": 0.01, "loss": 2.1136, "step": 10764 }, { "epoch": 1.104987684729064, "grad_norm": 0.08843538910150528, "learning_rate": 0.01, "loss": 2.0767, "step": 10767 }, { "epoch": 1.105295566502463, "grad_norm": 0.05961614102125168, "learning_rate": 0.01, "loss": 2.1067, "step": 10770 }, { "epoch": 1.105603448275862, "grad_norm": 0.08095360547304153, "learning_rate": 0.01, "loss": 2.066, "step": 10773 }, { "epoch": 1.1059113300492611, "grad_norm": 0.08094312995672226, "learning_rate": 0.01, "loss": 2.0849, "step": 10776 }, { "epoch": 1.1062192118226601, "grad_norm": 0.05718453973531723, "learning_rate": 0.01, "loss": 2.1097, "step": 10779 }, { "epoch": 1.1065270935960592, "grad_norm": 0.0537499338388443, "learning_rate": 0.01, "loss": 2.082, "step": 10782 }, { "epoch": 1.1068349753694582, "grad_norm": 0.06437748670578003, "learning_rate": 0.01, "loss": 2.0982, "step": 10785 }, { "epoch": 1.1071428571428572, "grad_norm": 0.03420199081301689, "learning_rate": 0.01, "loss": 2.0919, "step": 10788 }, { "epoch": 1.1074507389162562, "grad_norm": 0.049510665237903595, "learning_rate": 0.01, "loss": 2.0777, "step": 10791 }, { "epoch": 1.1077586206896552, "grad_norm": 0.044145356863737106, "learning_rate": 0.01, "loss": 2.0994, "step": 10794 }, { "epoch": 1.1080665024630543, "grad_norm": 0.0494622103869915, "learning_rate": 0.01, "loss": 2.103, "step": 10797 }, { "epoch": 1.1083743842364533, "grad_norm": 0.039029188454151154, "learning_rate": 0.01, "loss": 2.1026, "step": 10800 }, { "epoch": 1.1086822660098523, "grad_norm": 0.05786842480301857, "learning_rate": 0.01, "loss": 2.0767, "step": 10803 }, { "epoch": 1.1089901477832513, "grad_norm": 0.07576561719179153, "learning_rate": 0.01, "loss": 2.1078, "step": 10806 }, { "epoch": 1.1092980295566504, "grad_norm": 0.084762342274189, "learning_rate": 0.01, "loss": 2.1027, "step": 10809 }, { "epoch": 1.1096059113300494, "grad_norm": 0.05042179673910141, "learning_rate": 0.01, "loss": 2.0742, "step": 10812 }, { "epoch": 1.1099137931034482, "grad_norm": 0.07194402068853378, "learning_rate": 0.01, "loss": 2.0985, "step": 10815 }, { "epoch": 1.1102216748768472, "grad_norm": 0.13966146111488342, "learning_rate": 0.01, "loss": 2.0924, "step": 10818 }, { "epoch": 1.1105295566502462, "grad_norm": 0.060582250356674194, "learning_rate": 0.01, "loss": 2.1039, "step": 10821 }, { "epoch": 1.1108374384236452, "grad_norm": 0.03663609176874161, "learning_rate": 0.01, "loss": 2.0731, "step": 10824 }, { "epoch": 1.1111453201970443, "grad_norm": 0.09468091279268265, "learning_rate": 0.01, "loss": 2.0961, "step": 10827 }, { "epoch": 1.1114532019704433, "grad_norm": 0.07199615240097046, "learning_rate": 0.01, "loss": 2.0834, "step": 10830 }, { "epoch": 1.1117610837438423, "grad_norm": 0.06624965369701385, "learning_rate": 0.01, "loss": 2.1286, "step": 10833 }, { "epoch": 1.1120689655172413, "grad_norm": 0.0414128340780735, "learning_rate": 0.01, "loss": 2.0922, "step": 10836 }, { "epoch": 1.1123768472906403, "grad_norm": 0.06416642665863037, "learning_rate": 0.01, "loss": 2.0908, "step": 10839 }, { "epoch": 1.1126847290640394, "grad_norm": 0.05309692397713661, "learning_rate": 0.01, "loss": 2.117, "step": 10842 }, { "epoch": 1.1129926108374384, "grad_norm": 0.04576392099261284, "learning_rate": 0.01, "loss": 2.0801, "step": 10845 }, { "epoch": 1.1133004926108374, "grad_norm": 0.0887250304222107, "learning_rate": 0.01, "loss": 2.0815, "step": 10848 }, { "epoch": 1.1136083743842364, "grad_norm": 0.061223480850458145, "learning_rate": 0.01, "loss": 2.0607, "step": 10851 }, { "epoch": 1.1139162561576355, "grad_norm": 0.12983545660972595, "learning_rate": 0.01, "loss": 2.0882, "step": 10854 }, { "epoch": 1.1142241379310345, "grad_norm": 0.09382637590169907, "learning_rate": 0.01, "loss": 2.0838, "step": 10857 }, { "epoch": 1.1145320197044335, "grad_norm": 0.04275491461157799, "learning_rate": 0.01, "loss": 2.0905, "step": 10860 }, { "epoch": 1.1148399014778325, "grad_norm": 0.044315680861473083, "learning_rate": 0.01, "loss": 2.0924, "step": 10863 }, { "epoch": 1.1151477832512315, "grad_norm": 0.05177663639187813, "learning_rate": 0.01, "loss": 2.0985, "step": 10866 }, { "epoch": 1.1154556650246306, "grad_norm": 0.08161107450723648, "learning_rate": 0.01, "loss": 2.105, "step": 10869 }, { "epoch": 1.1157635467980296, "grad_norm": 0.08273576200008392, "learning_rate": 0.01, "loss": 2.0991, "step": 10872 }, { "epoch": 1.1160714285714286, "grad_norm": 0.04973771795630455, "learning_rate": 0.01, "loss": 2.0849, "step": 10875 }, { "epoch": 1.1163793103448276, "grad_norm": 0.036696773022413254, "learning_rate": 0.01, "loss": 2.0651, "step": 10878 }, { "epoch": 1.1166871921182266, "grad_norm": 0.03647401183843613, "learning_rate": 0.01, "loss": 2.0772, "step": 10881 }, { "epoch": 1.1169950738916257, "grad_norm": 0.03360895812511444, "learning_rate": 0.01, "loss": 2.0952, "step": 10884 }, { "epoch": 1.1173029556650247, "grad_norm": 0.037918057292699814, "learning_rate": 0.01, "loss": 2.0776, "step": 10887 }, { "epoch": 1.1176108374384237, "grad_norm": 0.10544890910387039, "learning_rate": 0.01, "loss": 2.1079, "step": 10890 }, { "epoch": 1.1179187192118227, "grad_norm": 0.15091745555400848, "learning_rate": 0.01, "loss": 2.1231, "step": 10893 }, { "epoch": 1.1182266009852218, "grad_norm": 0.07386527210474014, "learning_rate": 0.01, "loss": 2.0922, "step": 10896 }, { "epoch": 1.1185344827586208, "grad_norm": 0.04889804869890213, "learning_rate": 0.01, "loss": 2.1016, "step": 10899 }, { "epoch": 1.1188423645320198, "grad_norm": 0.04805940017104149, "learning_rate": 0.01, "loss": 2.0833, "step": 10902 }, { "epoch": 1.1191502463054188, "grad_norm": 0.040073320269584656, "learning_rate": 0.01, "loss": 2.0943, "step": 10905 }, { "epoch": 1.1194581280788178, "grad_norm": 0.046124961227178574, "learning_rate": 0.01, "loss": 2.0891, "step": 10908 }, { "epoch": 1.1197660098522166, "grad_norm": 0.04982076957821846, "learning_rate": 0.01, "loss": 2.0595, "step": 10911 }, { "epoch": 1.1200738916256157, "grad_norm": 0.036569107323884964, "learning_rate": 0.01, "loss": 2.0602, "step": 10914 }, { "epoch": 1.1203817733990147, "grad_norm": 0.033519893884658813, "learning_rate": 0.01, "loss": 2.1026, "step": 10917 }, { "epoch": 1.1206896551724137, "grad_norm": 0.0513744130730629, "learning_rate": 0.01, "loss": 2.1119, "step": 10920 }, { "epoch": 1.1209975369458127, "grad_norm": 0.08677095174789429, "learning_rate": 0.01, "loss": 2.0791, "step": 10923 }, { "epoch": 1.1213054187192117, "grad_norm": 0.1263512223958969, "learning_rate": 0.01, "loss": 2.0912, "step": 10926 }, { "epoch": 1.1216133004926108, "grad_norm": 0.0737731009721756, "learning_rate": 0.01, "loss": 2.1193, "step": 10929 }, { "epoch": 1.1219211822660098, "grad_norm": 0.045122213661670685, "learning_rate": 0.01, "loss": 2.1029, "step": 10932 }, { "epoch": 1.1222290640394088, "grad_norm": 0.04616571217775345, "learning_rate": 0.01, "loss": 2.062, "step": 10935 }, { "epoch": 1.1225369458128078, "grad_norm": 0.03985420614480972, "learning_rate": 0.01, "loss": 2.0868, "step": 10938 }, { "epoch": 1.1228448275862069, "grad_norm": 0.11042526364326477, "learning_rate": 0.01, "loss": 2.1057, "step": 10941 }, { "epoch": 1.1231527093596059, "grad_norm": 0.08071359992027283, "learning_rate": 0.01, "loss": 2.0796, "step": 10944 }, { "epoch": 1.123460591133005, "grad_norm": 0.049534909427165985, "learning_rate": 0.01, "loss": 2.1055, "step": 10947 }, { "epoch": 1.123768472906404, "grad_norm": 0.08341135829687119, "learning_rate": 0.01, "loss": 2.0948, "step": 10950 }, { "epoch": 1.124076354679803, "grad_norm": 0.03842156007885933, "learning_rate": 0.01, "loss": 2.1051, "step": 10953 }, { "epoch": 1.124384236453202, "grad_norm": 0.04978267103433609, "learning_rate": 0.01, "loss": 2.0927, "step": 10956 }, { "epoch": 1.124692118226601, "grad_norm": 0.04545191302895546, "learning_rate": 0.01, "loss": 2.0847, "step": 10959 }, { "epoch": 1.125, "grad_norm": 0.10103368014097214, "learning_rate": 0.01, "loss": 2.105, "step": 10962 }, { "epoch": 1.125307881773399, "grad_norm": 0.05956938862800598, "learning_rate": 0.01, "loss": 2.1043, "step": 10965 }, { "epoch": 1.125615763546798, "grad_norm": 0.048797741532325745, "learning_rate": 0.01, "loss": 2.1044, "step": 10968 }, { "epoch": 1.125923645320197, "grad_norm": 0.041901495307683945, "learning_rate": 0.01, "loss": 2.0847, "step": 10971 }, { "epoch": 1.126231527093596, "grad_norm": 0.14950989186763763, "learning_rate": 0.01, "loss": 2.0919, "step": 10974 }, { "epoch": 1.126539408866995, "grad_norm": 0.049760669469833374, "learning_rate": 0.01, "loss": 2.0707, "step": 10977 }, { "epoch": 1.1268472906403941, "grad_norm": 0.07016187906265259, "learning_rate": 0.01, "loss": 2.0709, "step": 10980 }, { "epoch": 1.1271551724137931, "grad_norm": 0.057528458535671234, "learning_rate": 0.01, "loss": 2.0759, "step": 10983 }, { "epoch": 1.1274630541871922, "grad_norm": 0.06690733879804611, "learning_rate": 0.01, "loss": 2.102, "step": 10986 }, { "epoch": 1.1277709359605912, "grad_norm": 0.05225450173020363, "learning_rate": 0.01, "loss": 2.0675, "step": 10989 }, { "epoch": 1.1280788177339902, "grad_norm": 0.048363544046878815, "learning_rate": 0.01, "loss": 2.0634, "step": 10992 }, { "epoch": 1.1283866995073892, "grad_norm": 0.05356382206082344, "learning_rate": 0.01, "loss": 2.1003, "step": 10995 }, { "epoch": 1.1286945812807883, "grad_norm": 0.06921149045228958, "learning_rate": 0.01, "loss": 2.0934, "step": 10998 }, { "epoch": 1.1290024630541873, "grad_norm": 0.04210525006055832, "learning_rate": 0.01, "loss": 2.096, "step": 11001 }, { "epoch": 1.1293103448275863, "grad_norm": 0.11790584027767181, "learning_rate": 0.01, "loss": 2.0746, "step": 11004 }, { "epoch": 1.1296182266009853, "grad_norm": 0.08045307546854019, "learning_rate": 0.01, "loss": 2.0929, "step": 11007 }, { "epoch": 1.1299261083743843, "grad_norm": 0.10474243015050888, "learning_rate": 0.01, "loss": 2.1418, "step": 11010 }, { "epoch": 1.1302339901477834, "grad_norm": 0.06073759123682976, "learning_rate": 0.01, "loss": 2.1089, "step": 11013 }, { "epoch": 1.1305418719211824, "grad_norm": 0.057685475796461105, "learning_rate": 0.01, "loss": 2.0959, "step": 11016 }, { "epoch": 1.1308497536945814, "grad_norm": 0.04218476638197899, "learning_rate": 0.01, "loss": 2.0834, "step": 11019 }, { "epoch": 1.1311576354679802, "grad_norm": 0.04814853519201279, "learning_rate": 0.01, "loss": 2.1134, "step": 11022 }, { "epoch": 1.1314655172413792, "grad_norm": 0.1344536989927292, "learning_rate": 0.01, "loss": 2.1121, "step": 11025 }, { "epoch": 1.1317733990147782, "grad_norm": 0.057088855654001236, "learning_rate": 0.01, "loss": 2.0978, "step": 11028 }, { "epoch": 1.1320812807881773, "grad_norm": 0.04567364603281021, "learning_rate": 0.01, "loss": 2.0837, "step": 11031 }, { "epoch": 1.1323891625615763, "grad_norm": 0.07506916671991348, "learning_rate": 0.01, "loss": 2.0926, "step": 11034 }, { "epoch": 1.1326970443349753, "grad_norm": 0.05837171897292137, "learning_rate": 0.01, "loss": 2.0961, "step": 11037 }, { "epoch": 1.1330049261083743, "grad_norm": 0.0457015223801136, "learning_rate": 0.01, "loss": 2.101, "step": 11040 }, { "epoch": 1.1333128078817734, "grad_norm": 0.061310991644859314, "learning_rate": 0.01, "loss": 2.1128, "step": 11043 }, { "epoch": 1.1336206896551724, "grad_norm": 0.05517786741256714, "learning_rate": 0.01, "loss": 2.0844, "step": 11046 }, { "epoch": 1.1339285714285714, "grad_norm": 0.07835637778043747, "learning_rate": 0.01, "loss": 2.0996, "step": 11049 }, { "epoch": 1.1342364532019704, "grad_norm": 0.05821641907095909, "learning_rate": 0.01, "loss": 2.1074, "step": 11052 }, { "epoch": 1.1345443349753694, "grad_norm": 0.04394884407520294, "learning_rate": 0.01, "loss": 2.0799, "step": 11055 }, { "epoch": 1.1348522167487685, "grad_norm": 0.05148720741271973, "learning_rate": 0.01, "loss": 2.0856, "step": 11058 }, { "epoch": 1.1351600985221675, "grad_norm": 0.05766841769218445, "learning_rate": 0.01, "loss": 2.0973, "step": 11061 }, { "epoch": 1.1354679802955665, "grad_norm": 0.09894710779190063, "learning_rate": 0.01, "loss": 2.0831, "step": 11064 }, { "epoch": 1.1357758620689655, "grad_norm": 0.11916875094175339, "learning_rate": 0.01, "loss": 2.1044, "step": 11067 }, { "epoch": 1.1360837438423645, "grad_norm": 0.03926829248666763, "learning_rate": 0.01, "loss": 2.0866, "step": 11070 }, { "epoch": 1.1363916256157636, "grad_norm": 0.05105220153927803, "learning_rate": 0.01, "loss": 2.0911, "step": 11073 }, { "epoch": 1.1366995073891626, "grad_norm": 0.04516123607754707, "learning_rate": 0.01, "loss": 2.0693, "step": 11076 }, { "epoch": 1.1370073891625616, "grad_norm": 0.046173594892024994, "learning_rate": 0.01, "loss": 2.092, "step": 11079 }, { "epoch": 1.1373152709359606, "grad_norm": 0.05173357576131821, "learning_rate": 0.01, "loss": 2.1007, "step": 11082 }, { "epoch": 1.1376231527093597, "grad_norm": 0.06486919522285461, "learning_rate": 0.01, "loss": 2.0775, "step": 11085 }, { "epoch": 1.1379310344827587, "grad_norm": 0.09763675928115845, "learning_rate": 0.01, "loss": 2.0942, "step": 11088 }, { "epoch": 1.1382389162561577, "grad_norm": 0.1281820833683014, "learning_rate": 0.01, "loss": 2.0903, "step": 11091 }, { "epoch": 1.1385467980295567, "grad_norm": 0.05734977498650551, "learning_rate": 0.01, "loss": 2.0891, "step": 11094 }, { "epoch": 1.1388546798029557, "grad_norm": 0.06809762120246887, "learning_rate": 0.01, "loss": 2.0855, "step": 11097 }, { "epoch": 1.1391625615763548, "grad_norm": 0.05105281621217728, "learning_rate": 0.01, "loss": 2.0975, "step": 11100 }, { "epoch": 1.1394704433497538, "grad_norm": 0.07381090521812439, "learning_rate": 0.01, "loss": 2.0762, "step": 11103 }, { "epoch": 1.1397783251231528, "grad_norm": 0.050722070038318634, "learning_rate": 0.01, "loss": 2.0778, "step": 11106 }, { "epoch": 1.1400862068965516, "grad_norm": 0.03850618377327919, "learning_rate": 0.01, "loss": 2.0626, "step": 11109 }, { "epoch": 1.1403940886699506, "grad_norm": 0.08264841884374619, "learning_rate": 0.01, "loss": 2.1258, "step": 11112 }, { "epoch": 1.1407019704433496, "grad_norm": 0.06493505835533142, "learning_rate": 0.01, "loss": 2.0961, "step": 11115 }, { "epoch": 1.1410098522167487, "grad_norm": 0.06895186007022858, "learning_rate": 0.01, "loss": 2.1207, "step": 11118 }, { "epoch": 1.1413177339901477, "grad_norm": 0.042232003062963486, "learning_rate": 0.01, "loss": 2.0724, "step": 11121 }, { "epoch": 1.1416256157635467, "grad_norm": 0.10296539217233658, "learning_rate": 0.01, "loss": 2.1126, "step": 11124 }, { "epoch": 1.1419334975369457, "grad_norm": 0.043095991015434265, "learning_rate": 0.01, "loss": 2.0928, "step": 11127 }, { "epoch": 1.1422413793103448, "grad_norm": 0.046020470559597015, "learning_rate": 0.01, "loss": 2.092, "step": 11130 }, { "epoch": 1.1425492610837438, "grad_norm": 0.04754204675555229, "learning_rate": 0.01, "loss": 2.126, "step": 11133 }, { "epoch": 1.1428571428571428, "grad_norm": 0.03732014447450638, "learning_rate": 0.01, "loss": 2.0637, "step": 11136 }, { "epoch": 1.1431650246305418, "grad_norm": 0.039080169051885605, "learning_rate": 0.01, "loss": 2.0877, "step": 11139 }, { "epoch": 1.1434729064039408, "grad_norm": 0.04575611278414726, "learning_rate": 0.01, "loss": 2.0646, "step": 11142 }, { "epoch": 1.1437807881773399, "grad_norm": 0.11764515191316605, "learning_rate": 0.01, "loss": 2.0964, "step": 11145 }, { "epoch": 1.1440886699507389, "grad_norm": 0.10087098181247711, "learning_rate": 0.01, "loss": 2.069, "step": 11148 }, { "epoch": 1.144396551724138, "grad_norm": 0.05462269112467766, "learning_rate": 0.01, "loss": 2.0902, "step": 11151 }, { "epoch": 1.144704433497537, "grad_norm": 0.06296168267726898, "learning_rate": 0.01, "loss": 2.1257, "step": 11154 }, { "epoch": 1.145012315270936, "grad_norm": 0.041026949882507324, "learning_rate": 0.01, "loss": 2.0672, "step": 11157 }, { "epoch": 1.145320197044335, "grad_norm": 0.05761269852519035, "learning_rate": 0.01, "loss": 2.0833, "step": 11160 }, { "epoch": 1.145628078817734, "grad_norm": 0.12491103261709213, "learning_rate": 0.01, "loss": 2.0681, "step": 11163 }, { "epoch": 1.145935960591133, "grad_norm": 0.09269531071186066, "learning_rate": 0.01, "loss": 2.0618, "step": 11166 }, { "epoch": 1.146243842364532, "grad_norm": 0.05623659864068031, "learning_rate": 0.01, "loss": 2.0861, "step": 11169 }, { "epoch": 1.146551724137931, "grad_norm": 0.04075248911976814, "learning_rate": 0.01, "loss": 2.0513, "step": 11172 }, { "epoch": 1.14685960591133, "grad_norm": 0.04557061940431595, "learning_rate": 0.01, "loss": 2.0876, "step": 11175 }, { "epoch": 1.147167487684729, "grad_norm": 0.05686535686254501, "learning_rate": 0.01, "loss": 2.0746, "step": 11178 }, { "epoch": 1.1474753694581281, "grad_norm": 0.04164785146713257, "learning_rate": 0.01, "loss": 2.0684, "step": 11181 }, { "epoch": 1.1477832512315271, "grad_norm": 0.05453825742006302, "learning_rate": 0.01, "loss": 2.0809, "step": 11184 }, { "epoch": 1.1480911330049262, "grad_norm": 0.15215769410133362, "learning_rate": 0.01, "loss": 2.0806, "step": 11187 }, { "epoch": 1.1483990147783252, "grad_norm": 0.14634385704994202, "learning_rate": 0.01, "loss": 2.11, "step": 11190 }, { "epoch": 1.1487068965517242, "grad_norm": 0.06655893474817276, "learning_rate": 0.01, "loss": 2.0953, "step": 11193 }, { "epoch": 1.1490147783251232, "grad_norm": 0.07074826955795288, "learning_rate": 0.01, "loss": 2.0984, "step": 11196 }, { "epoch": 1.1493226600985222, "grad_norm": 0.044581200927495956, "learning_rate": 0.01, "loss": 2.0816, "step": 11199 }, { "epoch": 1.1496305418719213, "grad_norm": 0.06769565492868423, "learning_rate": 0.01, "loss": 2.0809, "step": 11202 }, { "epoch": 1.1499384236453203, "grad_norm": 0.11437363177537918, "learning_rate": 0.01, "loss": 2.0761, "step": 11205 }, { "epoch": 1.1502463054187193, "grad_norm": 0.040699586272239685, "learning_rate": 0.01, "loss": 2.1028, "step": 11208 }, { "epoch": 1.1505541871921183, "grad_norm": 0.06996385753154755, "learning_rate": 0.01, "loss": 2.0755, "step": 11211 }, { "epoch": 1.1508620689655173, "grad_norm": 0.04621044918894768, "learning_rate": 0.01, "loss": 2.11, "step": 11214 }, { "epoch": 1.1511699507389164, "grad_norm": 0.07714984565973282, "learning_rate": 0.01, "loss": 2.1008, "step": 11217 }, { "epoch": 1.1514778325123154, "grad_norm": 0.11194620281457901, "learning_rate": 0.01, "loss": 2.093, "step": 11220 }, { "epoch": 1.1517857142857142, "grad_norm": 0.08710391074419022, "learning_rate": 0.01, "loss": 2.1017, "step": 11223 }, { "epoch": 1.1520935960591132, "grad_norm": 0.06653989106416702, "learning_rate": 0.01, "loss": 2.1084, "step": 11226 }, { "epoch": 1.1524014778325122, "grad_norm": 0.056591540575027466, "learning_rate": 0.01, "loss": 2.107, "step": 11229 }, { "epoch": 1.1527093596059113, "grad_norm": 0.056475669145584106, "learning_rate": 0.01, "loss": 2.0774, "step": 11232 }, { "epoch": 1.1530172413793103, "grad_norm": 0.08408259600400925, "learning_rate": 0.01, "loss": 2.1193, "step": 11235 }, { "epoch": 1.1533251231527093, "grad_norm": 0.06853178143501282, "learning_rate": 0.01, "loss": 2.0796, "step": 11238 }, { "epoch": 1.1536330049261083, "grad_norm": 0.11699818074703217, "learning_rate": 0.01, "loss": 2.0932, "step": 11241 }, { "epoch": 1.1539408866995073, "grad_norm": 0.06365542113780975, "learning_rate": 0.01, "loss": 2.079, "step": 11244 }, { "epoch": 1.1542487684729064, "grad_norm": 0.040505316108465195, "learning_rate": 0.01, "loss": 2.0688, "step": 11247 }, { "epoch": 1.1545566502463054, "grad_norm": 0.06958888471126556, "learning_rate": 0.01, "loss": 2.0882, "step": 11250 }, { "epoch": 1.1548645320197044, "grad_norm": 0.08968232572078705, "learning_rate": 0.01, "loss": 2.064, "step": 11253 }, { "epoch": 1.1551724137931034, "grad_norm": 0.05143412947654724, "learning_rate": 0.01, "loss": 2.0682, "step": 11256 }, { "epoch": 1.1554802955665024, "grad_norm": 0.1219927966594696, "learning_rate": 0.01, "loss": 2.1038, "step": 11259 }, { "epoch": 1.1557881773399015, "grad_norm": 0.11974184960126877, "learning_rate": 0.01, "loss": 2.1099, "step": 11262 }, { "epoch": 1.1560960591133005, "grad_norm": 0.09557066112756729, "learning_rate": 0.01, "loss": 2.0976, "step": 11265 }, { "epoch": 1.1564039408866995, "grad_norm": 0.10169565677642822, "learning_rate": 0.01, "loss": 2.0904, "step": 11268 }, { "epoch": 1.1567118226600985, "grad_norm": 0.0586596317589283, "learning_rate": 0.01, "loss": 2.0787, "step": 11271 }, { "epoch": 1.1570197044334976, "grad_norm": 0.0423048697412014, "learning_rate": 0.01, "loss": 2.1084, "step": 11274 }, { "epoch": 1.1573275862068966, "grad_norm": 0.07394707947969437, "learning_rate": 0.01, "loss": 2.0802, "step": 11277 }, { "epoch": 1.1576354679802956, "grad_norm": 0.05507487431168556, "learning_rate": 0.01, "loss": 2.0777, "step": 11280 }, { "epoch": 1.1579433497536946, "grad_norm": 0.03251083940267563, "learning_rate": 0.01, "loss": 2.1128, "step": 11283 }, { "epoch": 1.1582512315270936, "grad_norm": 0.04480468109250069, "learning_rate": 0.01, "loss": 2.1162, "step": 11286 }, { "epoch": 1.1585591133004927, "grad_norm": 0.08275749534368515, "learning_rate": 0.01, "loss": 2.0808, "step": 11289 }, { "epoch": 1.1588669950738917, "grad_norm": 0.08199379593133926, "learning_rate": 0.01, "loss": 2.1006, "step": 11292 }, { "epoch": 1.1591748768472907, "grad_norm": 0.1015235111117363, "learning_rate": 0.01, "loss": 2.0926, "step": 11295 }, { "epoch": 1.1594827586206897, "grad_norm": 0.08872919529676437, "learning_rate": 0.01, "loss": 2.0739, "step": 11298 }, { "epoch": 1.1597906403940887, "grad_norm": 0.04640579596161842, "learning_rate": 0.01, "loss": 2.09, "step": 11301 }, { "epoch": 1.1600985221674878, "grad_norm": 0.044142261147499084, "learning_rate": 0.01, "loss": 2.115, "step": 11304 }, { "epoch": 1.1604064039408868, "grad_norm": 0.1030762568116188, "learning_rate": 0.01, "loss": 2.1358, "step": 11307 }, { "epoch": 1.1607142857142858, "grad_norm": 0.06712359189987183, "learning_rate": 0.01, "loss": 2.0504, "step": 11310 }, { "epoch": 1.1610221674876846, "grad_norm": 0.05579240992665291, "learning_rate": 0.01, "loss": 2.0915, "step": 11313 }, { "epoch": 1.1613300492610836, "grad_norm": 0.04237228259444237, "learning_rate": 0.01, "loss": 2.0514, "step": 11316 }, { "epoch": 1.1616379310344827, "grad_norm": 0.08990642428398132, "learning_rate": 0.01, "loss": 2.0753, "step": 11319 }, { "epoch": 1.1619458128078817, "grad_norm": 0.09788185358047485, "learning_rate": 0.01, "loss": 2.0907, "step": 11322 }, { "epoch": 1.1622536945812807, "grad_norm": 0.07207074761390686, "learning_rate": 0.01, "loss": 2.0858, "step": 11325 }, { "epoch": 1.1625615763546797, "grad_norm": 0.07704904675483704, "learning_rate": 0.01, "loss": 2.0938, "step": 11328 }, { "epoch": 1.1628694581280787, "grad_norm": 0.07003269344568253, "learning_rate": 0.01, "loss": 2.1031, "step": 11331 }, { "epoch": 1.1631773399014778, "grad_norm": 0.05584646388888359, "learning_rate": 0.01, "loss": 2.0852, "step": 11334 }, { "epoch": 1.1634852216748768, "grad_norm": 0.10223853588104248, "learning_rate": 0.01, "loss": 2.072, "step": 11337 }, { "epoch": 1.1637931034482758, "grad_norm": 0.13336369395256042, "learning_rate": 0.01, "loss": 2.0902, "step": 11340 }, { "epoch": 1.1641009852216748, "grad_norm": 0.04471458122134209, "learning_rate": 0.01, "loss": 2.0884, "step": 11343 }, { "epoch": 1.1644088669950738, "grad_norm": 0.04342315346002579, "learning_rate": 0.01, "loss": 2.0885, "step": 11346 }, { "epoch": 1.1647167487684729, "grad_norm": 0.05172869563102722, "learning_rate": 0.01, "loss": 2.069, "step": 11349 }, { "epoch": 1.1650246305418719, "grad_norm": 0.04304314777255058, "learning_rate": 0.01, "loss": 2.0915, "step": 11352 }, { "epoch": 1.165332512315271, "grad_norm": 0.14592792093753815, "learning_rate": 0.01, "loss": 2.0841, "step": 11355 }, { "epoch": 1.16564039408867, "grad_norm": 0.05238402634859085, "learning_rate": 0.01, "loss": 2.0772, "step": 11358 }, { "epoch": 1.165948275862069, "grad_norm": 0.04800669103860855, "learning_rate": 0.01, "loss": 2.0739, "step": 11361 }, { "epoch": 1.166256157635468, "grad_norm": 0.06648313999176025, "learning_rate": 0.01, "loss": 2.0956, "step": 11364 }, { "epoch": 1.166564039408867, "grad_norm": 0.03402326628565788, "learning_rate": 0.01, "loss": 2.0847, "step": 11367 }, { "epoch": 1.166871921182266, "grad_norm": 0.05076766759157181, "learning_rate": 0.01, "loss": 2.0833, "step": 11370 }, { "epoch": 1.167179802955665, "grad_norm": 0.07221470773220062, "learning_rate": 0.01, "loss": 2.0778, "step": 11373 }, { "epoch": 1.167487684729064, "grad_norm": 0.04556736722588539, "learning_rate": 0.01, "loss": 2.0957, "step": 11376 }, { "epoch": 1.167795566502463, "grad_norm": 0.03702834993600845, "learning_rate": 0.01, "loss": 2.0843, "step": 11379 }, { "epoch": 1.168103448275862, "grad_norm": 0.046527571976184845, "learning_rate": 0.01, "loss": 2.084, "step": 11382 }, { "epoch": 1.1684113300492611, "grad_norm": 0.09520363062620163, "learning_rate": 0.01, "loss": 2.0924, "step": 11385 }, { "epoch": 1.1687192118226601, "grad_norm": 0.12759263813495636, "learning_rate": 0.01, "loss": 2.1265, "step": 11388 }, { "epoch": 1.1690270935960592, "grad_norm": 0.03981192037463188, "learning_rate": 0.01, "loss": 2.076, "step": 11391 }, { "epoch": 1.1693349753694582, "grad_norm": 0.04739897698163986, "learning_rate": 0.01, "loss": 2.1013, "step": 11394 }, { "epoch": 1.1696428571428572, "grad_norm": 0.04937390610575676, "learning_rate": 0.01, "loss": 2.0832, "step": 11397 }, { "epoch": 1.1699507389162562, "grad_norm": 0.07097122073173523, "learning_rate": 0.01, "loss": 2.0699, "step": 11400 }, { "epoch": 1.1702586206896552, "grad_norm": 0.07773783802986145, "learning_rate": 0.01, "loss": 2.1215, "step": 11403 }, { "epoch": 1.1705665024630543, "grad_norm": 0.04591994732618332, "learning_rate": 0.01, "loss": 2.0657, "step": 11406 }, { "epoch": 1.1708743842364533, "grad_norm": 0.08724237233400345, "learning_rate": 0.01, "loss": 2.0817, "step": 11409 }, { "epoch": 1.1711822660098523, "grad_norm": 0.06528041511774063, "learning_rate": 0.01, "loss": 2.0962, "step": 11412 }, { "epoch": 1.1714901477832513, "grad_norm": 0.0660424679517746, "learning_rate": 0.01, "loss": 2.0904, "step": 11415 }, { "epoch": 1.1717980295566504, "grad_norm": 0.08304266631603241, "learning_rate": 0.01, "loss": 2.0912, "step": 11418 }, { "epoch": 1.1721059113300494, "grad_norm": 0.05073266103863716, "learning_rate": 0.01, "loss": 2.0747, "step": 11421 }, { "epoch": 1.1724137931034484, "grad_norm": 0.07305736094713211, "learning_rate": 0.01, "loss": 2.0852, "step": 11424 }, { "epoch": 1.1727216748768472, "grad_norm": 0.09365279227495193, "learning_rate": 0.01, "loss": 2.0883, "step": 11427 }, { "epoch": 1.1730295566502462, "grad_norm": 0.12279067188501358, "learning_rate": 0.01, "loss": 2.0999, "step": 11430 }, { "epoch": 1.1733374384236452, "grad_norm": 0.0589769072830677, "learning_rate": 0.01, "loss": 2.0948, "step": 11433 }, { "epoch": 1.1736453201970443, "grad_norm": 0.06621567159891129, "learning_rate": 0.01, "loss": 2.0823, "step": 11436 }, { "epoch": 1.1739532019704433, "grad_norm": 0.051341816782951355, "learning_rate": 0.01, "loss": 2.0622, "step": 11439 }, { "epoch": 1.1742610837438423, "grad_norm": 0.06027314066886902, "learning_rate": 0.01, "loss": 2.0798, "step": 11442 }, { "epoch": 1.1745689655172413, "grad_norm": 0.10131573677062988, "learning_rate": 0.01, "loss": 2.0882, "step": 11445 }, { "epoch": 1.1748768472906403, "grad_norm": 0.08082377910614014, "learning_rate": 0.01, "loss": 2.0949, "step": 11448 }, { "epoch": 1.1751847290640394, "grad_norm": 0.07095243781805038, "learning_rate": 0.01, "loss": 2.0655, "step": 11451 }, { "epoch": 1.1754926108374384, "grad_norm": 0.07132910192012787, "learning_rate": 0.01, "loss": 2.0903, "step": 11454 }, { "epoch": 1.1758004926108374, "grad_norm": 0.10488838702440262, "learning_rate": 0.01, "loss": 2.0639, "step": 11457 }, { "epoch": 1.1761083743842364, "grad_norm": 0.12755680084228516, "learning_rate": 0.01, "loss": 2.0989, "step": 11460 }, { "epoch": 1.1764162561576355, "grad_norm": 0.12174911797046661, "learning_rate": 0.01, "loss": 2.1026, "step": 11463 }, { "epoch": 1.1767241379310345, "grad_norm": 0.07873964309692383, "learning_rate": 0.01, "loss": 2.0908, "step": 11466 }, { "epoch": 1.1770320197044335, "grad_norm": 0.04275409132242203, "learning_rate": 0.01, "loss": 2.0899, "step": 11469 }, { "epoch": 1.1773399014778325, "grad_norm": 0.046134103089571, "learning_rate": 0.01, "loss": 2.1064, "step": 11472 }, { "epoch": 1.1776477832512315, "grad_norm": 0.07631804049015045, "learning_rate": 0.01, "loss": 2.0811, "step": 11475 }, { "epoch": 1.1779556650246306, "grad_norm": 0.04843062162399292, "learning_rate": 0.01, "loss": 2.1078, "step": 11478 }, { "epoch": 1.1782635467980296, "grad_norm": 0.04664747416973114, "learning_rate": 0.01, "loss": 2.0807, "step": 11481 }, { "epoch": 1.1785714285714286, "grad_norm": 0.042328983545303345, "learning_rate": 0.01, "loss": 2.0898, "step": 11484 }, { "epoch": 1.1788793103448276, "grad_norm": 0.04443054646253586, "learning_rate": 0.01, "loss": 2.092, "step": 11487 }, { "epoch": 1.1791871921182266, "grad_norm": 0.03439139202237129, "learning_rate": 0.01, "loss": 2.0438, "step": 11490 }, { "epoch": 1.1794950738916257, "grad_norm": 0.1651001274585724, "learning_rate": 0.01, "loss": 2.1068, "step": 11493 }, { "epoch": 1.1798029556650247, "grad_norm": 0.04535198211669922, "learning_rate": 0.01, "loss": 2.0951, "step": 11496 }, { "epoch": 1.1801108374384237, "grad_norm": 0.04346736520528793, "learning_rate": 0.01, "loss": 2.1032, "step": 11499 }, { "epoch": 1.1804187192118227, "grad_norm": 0.08131600171327591, "learning_rate": 0.01, "loss": 2.0709, "step": 11502 }, { "epoch": 1.1807266009852218, "grad_norm": 0.0638989582657814, "learning_rate": 0.01, "loss": 2.0676, "step": 11505 }, { "epoch": 1.1810344827586208, "grad_norm": 0.06305757910013199, "learning_rate": 0.01, "loss": 2.0636, "step": 11508 }, { "epoch": 1.1813423645320198, "grad_norm": 0.048068735748529434, "learning_rate": 0.01, "loss": 2.0803, "step": 11511 }, { "epoch": 1.1816502463054186, "grad_norm": 0.1859566867351532, "learning_rate": 0.01, "loss": 2.1206, "step": 11514 }, { "epoch": 1.1819581280788176, "grad_norm": 0.13449640572071075, "learning_rate": 0.01, "loss": 2.1047, "step": 11517 }, { "epoch": 1.1822660098522166, "grad_norm": 0.09624927490949631, "learning_rate": 0.01, "loss": 2.1338, "step": 11520 }, { "epoch": 1.1825738916256157, "grad_norm": 0.04823729023337364, "learning_rate": 0.01, "loss": 2.1485, "step": 11523 }, { "epoch": 1.1828817733990147, "grad_norm": 0.04005538672208786, "learning_rate": 0.01, "loss": 2.1062, "step": 11526 }, { "epoch": 1.1831896551724137, "grad_norm": 0.035647980868816376, "learning_rate": 0.01, "loss": 2.1002, "step": 11529 }, { "epoch": 1.1834975369458127, "grad_norm": 0.03485687077045441, "learning_rate": 0.01, "loss": 2.085, "step": 11532 }, { "epoch": 1.1838054187192117, "grad_norm": 0.03271855041384697, "learning_rate": 0.01, "loss": 2.1083, "step": 11535 }, { "epoch": 1.1841133004926108, "grad_norm": 0.14434824883937836, "learning_rate": 0.01, "loss": 2.0917, "step": 11538 }, { "epoch": 1.1844211822660098, "grad_norm": 0.16373054683208466, "learning_rate": 0.01, "loss": 2.0889, "step": 11541 }, { "epoch": 1.1847290640394088, "grad_norm": 0.1128426045179367, "learning_rate": 0.01, "loss": 2.0605, "step": 11544 }, { "epoch": 1.1850369458128078, "grad_norm": 0.03807492554187775, "learning_rate": 0.01, "loss": 2.0792, "step": 11547 }, { "epoch": 1.1853448275862069, "grad_norm": 0.0678490698337555, "learning_rate": 0.01, "loss": 2.072, "step": 11550 }, { "epoch": 1.1856527093596059, "grad_norm": 0.04688851907849312, "learning_rate": 0.01, "loss": 2.0648, "step": 11553 }, { "epoch": 1.185960591133005, "grad_norm": 0.03682132810354233, "learning_rate": 0.01, "loss": 2.096, "step": 11556 }, { "epoch": 1.186268472906404, "grad_norm": 0.0797944962978363, "learning_rate": 0.01, "loss": 2.0632, "step": 11559 }, { "epoch": 1.186576354679803, "grad_norm": 0.09593506157398224, "learning_rate": 0.01, "loss": 2.0923, "step": 11562 }, { "epoch": 1.186884236453202, "grad_norm": 0.10455387830734253, "learning_rate": 0.01, "loss": 2.0927, "step": 11565 }, { "epoch": 1.187192118226601, "grad_norm": 0.05642642080783844, "learning_rate": 0.01, "loss": 2.0688, "step": 11568 }, { "epoch": 1.1875, "grad_norm": 0.09467128664255142, "learning_rate": 0.01, "loss": 2.0561, "step": 11571 }, { "epoch": 1.187807881773399, "grad_norm": 0.061598166823387146, "learning_rate": 0.01, "loss": 2.0666, "step": 11574 }, { "epoch": 1.188115763546798, "grad_norm": 0.0875246673822403, "learning_rate": 0.01, "loss": 2.0753, "step": 11577 }, { "epoch": 1.188423645320197, "grad_norm": 0.05889583006501198, "learning_rate": 0.01, "loss": 2.0705, "step": 11580 }, { "epoch": 1.188731527093596, "grad_norm": 0.0796559602022171, "learning_rate": 0.01, "loss": 2.0939, "step": 11583 }, { "epoch": 1.189039408866995, "grad_norm": 0.04127117991447449, "learning_rate": 0.01, "loss": 2.0905, "step": 11586 }, { "epoch": 1.1893472906403941, "grad_norm": 0.06161842495203018, "learning_rate": 0.01, "loss": 2.0954, "step": 11589 }, { "epoch": 1.1896551724137931, "grad_norm": 0.05344879627227783, "learning_rate": 0.01, "loss": 2.0808, "step": 11592 }, { "epoch": 1.1899630541871922, "grad_norm": 0.03660701587796211, "learning_rate": 0.01, "loss": 2.0748, "step": 11595 }, { "epoch": 1.1902709359605912, "grad_norm": 0.04792351648211479, "learning_rate": 0.01, "loss": 2.0833, "step": 11598 }, { "epoch": 1.1905788177339902, "grad_norm": 0.04336618259549141, "learning_rate": 0.01, "loss": 2.1017, "step": 11601 }, { "epoch": 1.1908866995073892, "grad_norm": 0.0654226765036583, "learning_rate": 0.01, "loss": 2.0922, "step": 11604 }, { "epoch": 1.1911945812807883, "grad_norm": 0.08879897743463516, "learning_rate": 0.01, "loss": 2.0842, "step": 11607 }, { "epoch": 1.1915024630541873, "grad_norm": 0.15568268299102783, "learning_rate": 0.01, "loss": 2.0947, "step": 11610 }, { "epoch": 1.1918103448275863, "grad_norm": 0.11712448298931122, "learning_rate": 0.01, "loss": 2.0955, "step": 11613 }, { "epoch": 1.1921182266009853, "grad_norm": 0.04966702312231064, "learning_rate": 0.01, "loss": 2.1093, "step": 11616 }, { "epoch": 1.1924261083743843, "grad_norm": 0.04304838925600052, "learning_rate": 0.01, "loss": 2.0828, "step": 11619 }, { "epoch": 1.1927339901477834, "grad_norm": 0.04981999844312668, "learning_rate": 0.01, "loss": 2.0964, "step": 11622 }, { "epoch": 1.1930418719211824, "grad_norm": 0.045383159071207047, "learning_rate": 0.01, "loss": 2.0967, "step": 11625 }, { "epoch": 1.1933497536945814, "grad_norm": 0.0348484069108963, "learning_rate": 0.01, "loss": 2.0685, "step": 11628 }, { "epoch": 1.1936576354679802, "grad_norm": 0.04802081733942032, "learning_rate": 0.01, "loss": 2.0904, "step": 11631 }, { "epoch": 1.1939655172413792, "grad_norm": 0.0615711510181427, "learning_rate": 0.01, "loss": 2.1153, "step": 11634 }, { "epoch": 1.1942733990147782, "grad_norm": 0.15608100593090057, "learning_rate": 0.01, "loss": 2.0753, "step": 11637 }, { "epoch": 1.1945812807881773, "grad_norm": 0.10449741780757904, "learning_rate": 0.01, "loss": 2.1083, "step": 11640 }, { "epoch": 1.1948891625615763, "grad_norm": 0.062145963311195374, "learning_rate": 0.01, "loss": 2.1104, "step": 11643 }, { "epoch": 1.1951970443349753, "grad_norm": 0.04744469001889229, "learning_rate": 0.01, "loss": 2.0742, "step": 11646 }, { "epoch": 1.1955049261083743, "grad_norm": 0.036814477294683456, "learning_rate": 0.01, "loss": 2.0641, "step": 11649 }, { "epoch": 1.1958128078817734, "grad_norm": 0.037870246917009354, "learning_rate": 0.01, "loss": 2.0906, "step": 11652 }, { "epoch": 1.1961206896551724, "grad_norm": 0.17372412979602814, "learning_rate": 0.01, "loss": 2.1027, "step": 11655 }, { "epoch": 1.1964285714285714, "grad_norm": 0.04681265726685524, "learning_rate": 0.01, "loss": 2.1029, "step": 11658 }, { "epoch": 1.1967364532019704, "grad_norm": 0.058284103870391846, "learning_rate": 0.01, "loss": 2.0829, "step": 11661 }, { "epoch": 1.1970443349753694, "grad_norm": 0.07531574368476868, "learning_rate": 0.01, "loss": 2.0602, "step": 11664 }, { "epoch": 1.1973522167487685, "grad_norm": 0.053437430411577225, "learning_rate": 0.01, "loss": 2.0726, "step": 11667 }, { "epoch": 1.1976600985221675, "grad_norm": 0.047438427805900574, "learning_rate": 0.01, "loss": 2.107, "step": 11670 }, { "epoch": 1.1979679802955665, "grad_norm": 0.04474404826760292, "learning_rate": 0.01, "loss": 2.08, "step": 11673 }, { "epoch": 1.1982758620689655, "grad_norm": 0.15452256798744202, "learning_rate": 0.01, "loss": 2.0788, "step": 11676 }, { "epoch": 1.1985837438423645, "grad_norm": 0.05446213111281395, "learning_rate": 0.01, "loss": 2.0696, "step": 11679 }, { "epoch": 1.1988916256157636, "grad_norm": 0.059178926050662994, "learning_rate": 0.01, "loss": 2.0867, "step": 11682 }, { "epoch": 1.1991995073891626, "grad_norm": 0.05807918682694435, "learning_rate": 0.01, "loss": 2.0675, "step": 11685 }, { "epoch": 1.1995073891625616, "grad_norm": 0.046843890100717545, "learning_rate": 0.01, "loss": 2.0729, "step": 11688 }, { "epoch": 1.1998152709359606, "grad_norm": 0.042494870722293854, "learning_rate": 0.01, "loss": 2.079, "step": 11691 }, { "epoch": 1.2001231527093597, "grad_norm": 0.04506772756576538, "learning_rate": 0.01, "loss": 2.0862, "step": 11694 }, { "epoch": 1.2004310344827587, "grad_norm": 0.04237942770123482, "learning_rate": 0.01, "loss": 2.0712, "step": 11697 }, { "epoch": 1.2007389162561577, "grad_norm": 0.03488307446241379, "learning_rate": 0.01, "loss": 2.1051, "step": 11700 }, { "epoch": 1.2010467980295567, "grad_norm": 0.03693482652306557, "learning_rate": 0.01, "loss": 2.045, "step": 11703 }, { "epoch": 1.2013546798029557, "grad_norm": 0.10410935431718826, "learning_rate": 0.01, "loss": 2.0884, "step": 11706 }, { "epoch": 1.2016625615763548, "grad_norm": 0.11816459894180298, "learning_rate": 0.01, "loss": 2.0687, "step": 11709 }, { "epoch": 1.2019704433497538, "grad_norm": 0.06567800790071487, "learning_rate": 0.01, "loss": 2.063, "step": 11712 }, { "epoch": 1.2022783251231528, "grad_norm": 0.06639432907104492, "learning_rate": 0.01, "loss": 2.078, "step": 11715 }, { "epoch": 1.2025862068965516, "grad_norm": 0.05059380456805229, "learning_rate": 0.01, "loss": 2.1163, "step": 11718 }, { "epoch": 1.2028940886699506, "grad_norm": 0.04076917842030525, "learning_rate": 0.01, "loss": 2.0784, "step": 11721 }, { "epoch": 1.2032019704433496, "grad_norm": 0.05994633212685585, "learning_rate": 0.01, "loss": 2.0819, "step": 11724 }, { "epoch": 1.2035098522167487, "grad_norm": 0.05682201310992241, "learning_rate": 0.01, "loss": 2.0625, "step": 11727 }, { "epoch": 1.2038177339901477, "grad_norm": 0.05393010750412941, "learning_rate": 0.01, "loss": 2.072, "step": 11730 }, { "epoch": 1.2041256157635467, "grad_norm": 0.04697128012776375, "learning_rate": 0.01, "loss": 2.0796, "step": 11733 }, { "epoch": 1.2044334975369457, "grad_norm": 0.04945002868771553, "learning_rate": 0.01, "loss": 2.0666, "step": 11736 }, { "epoch": 1.2047413793103448, "grad_norm": 0.06519649177789688, "learning_rate": 0.01, "loss": 2.0873, "step": 11739 }, { "epoch": 1.2050492610837438, "grad_norm": 0.1188720241189003, "learning_rate": 0.01, "loss": 2.0967, "step": 11742 }, { "epoch": 1.2053571428571428, "grad_norm": 0.1045864149928093, "learning_rate": 0.01, "loss": 2.0834, "step": 11745 }, { "epoch": 1.2056650246305418, "grad_norm": 0.04561993479728699, "learning_rate": 0.01, "loss": 2.0872, "step": 11748 }, { "epoch": 1.2059729064039408, "grad_norm": 0.04972228407859802, "learning_rate": 0.01, "loss": 2.0599, "step": 11751 }, { "epoch": 1.2062807881773399, "grad_norm": 0.05342618376016617, "learning_rate": 0.01, "loss": 2.0606, "step": 11754 }, { "epoch": 1.2065886699507389, "grad_norm": 0.05637587606906891, "learning_rate": 0.01, "loss": 2.0836, "step": 11757 }, { "epoch": 1.206896551724138, "grad_norm": 0.11595457047224045, "learning_rate": 0.01, "loss": 2.0904, "step": 11760 }, { "epoch": 1.207204433497537, "grad_norm": 0.11803465336561203, "learning_rate": 0.01, "loss": 2.0741, "step": 11763 }, { "epoch": 1.207512315270936, "grad_norm": 0.045427508652210236, "learning_rate": 0.01, "loss": 2.0721, "step": 11766 }, { "epoch": 1.207820197044335, "grad_norm": 0.0365883894264698, "learning_rate": 0.01, "loss": 2.0599, "step": 11769 }, { "epoch": 1.208128078817734, "grad_norm": 0.03729262575507164, "learning_rate": 0.01, "loss": 2.087, "step": 11772 }, { "epoch": 1.208435960591133, "grad_norm": 0.05842882767319679, "learning_rate": 0.01, "loss": 2.0762, "step": 11775 }, { "epoch": 1.208743842364532, "grad_norm": 0.07687997072935104, "learning_rate": 0.01, "loss": 2.0691, "step": 11778 }, { "epoch": 1.209051724137931, "grad_norm": 0.06832735985517502, "learning_rate": 0.01, "loss": 2.0974, "step": 11781 }, { "epoch": 1.20935960591133, "grad_norm": 0.10200455039739609, "learning_rate": 0.01, "loss": 2.0866, "step": 11784 }, { "epoch": 1.209667487684729, "grad_norm": 0.10769661515951157, "learning_rate": 0.01, "loss": 2.0761, "step": 11787 }, { "epoch": 1.2099753694581281, "grad_norm": 0.12233126908540726, "learning_rate": 0.01, "loss": 2.0911, "step": 11790 }, { "epoch": 1.2102832512315271, "grad_norm": 0.046646762639284134, "learning_rate": 0.01, "loss": 2.0546, "step": 11793 }, { "epoch": 1.2105911330049262, "grad_norm": 0.030627859756350517, "learning_rate": 0.01, "loss": 2.0805, "step": 11796 }, { "epoch": 1.2108990147783252, "grad_norm": 0.03977693244814873, "learning_rate": 0.01, "loss": 2.0465, "step": 11799 }, { "epoch": 1.2112068965517242, "grad_norm": 0.06213162839412689, "learning_rate": 0.01, "loss": 2.1123, "step": 11802 }, { "epoch": 1.2115147783251232, "grad_norm": 0.04708317294716835, "learning_rate": 0.01, "loss": 2.1315, "step": 11805 }, { "epoch": 1.2118226600985222, "grad_norm": 0.10807790607213974, "learning_rate": 0.01, "loss": 2.0825, "step": 11808 }, { "epoch": 1.2121305418719213, "grad_norm": 0.08579280972480774, "learning_rate": 0.01, "loss": 2.1366, "step": 11811 }, { "epoch": 1.2124384236453203, "grad_norm": 0.05783751606941223, "learning_rate": 0.01, "loss": 2.0682, "step": 11814 }, { "epoch": 1.2127463054187193, "grad_norm": 0.04836808145046234, "learning_rate": 0.01, "loss": 2.0606, "step": 11817 }, { "epoch": 1.2130541871921183, "grad_norm": 0.04026191681623459, "learning_rate": 0.01, "loss": 2.0637, "step": 11820 }, { "epoch": 1.2133620689655173, "grad_norm": 0.03309273347258568, "learning_rate": 0.01, "loss": 2.08, "step": 11823 }, { "epoch": 1.2136699507389164, "grad_norm": 0.10611164569854736, "learning_rate": 0.01, "loss": 2.0787, "step": 11826 }, { "epoch": 1.2139778325123154, "grad_norm": 0.0817422866821289, "learning_rate": 0.01, "loss": 2.069, "step": 11829 }, { "epoch": 1.2142857142857142, "grad_norm": 0.05528125911951065, "learning_rate": 0.01, "loss": 2.0408, "step": 11832 }, { "epoch": 1.2145935960591132, "grad_norm": 0.05016999691724777, "learning_rate": 0.01, "loss": 2.0794, "step": 11835 }, { "epoch": 1.2149014778325122, "grad_norm": 0.06376414746046066, "learning_rate": 0.01, "loss": 2.1097, "step": 11838 }, { "epoch": 1.2152093596059113, "grad_norm": 0.06668904423713684, "learning_rate": 0.01, "loss": 2.0745, "step": 11841 }, { "epoch": 1.2155172413793103, "grad_norm": 0.046260587871074677, "learning_rate": 0.01, "loss": 2.0711, "step": 11844 }, { "epoch": 1.2158251231527093, "grad_norm": 0.039374321699142456, "learning_rate": 0.01, "loss": 2.087, "step": 11847 }, { "epoch": 1.2161330049261083, "grad_norm": 0.10993562638759613, "learning_rate": 0.01, "loss": 2.0615, "step": 11850 }, { "epoch": 1.2164408866995073, "grad_norm": 0.03676668182015419, "learning_rate": 0.01, "loss": 2.0717, "step": 11853 }, { "epoch": 1.2167487684729064, "grad_norm": 0.10777715593576431, "learning_rate": 0.01, "loss": 2.1016, "step": 11856 }, { "epoch": 1.2170566502463054, "grad_norm": 0.07948705554008484, "learning_rate": 0.01, "loss": 2.083, "step": 11859 }, { "epoch": 1.2173645320197044, "grad_norm": 0.11646637320518494, "learning_rate": 0.01, "loss": 2.0552, "step": 11862 }, { "epoch": 1.2176724137931034, "grad_norm": 0.07525186985731125, "learning_rate": 0.01, "loss": 2.0877, "step": 11865 }, { "epoch": 1.2179802955665024, "grad_norm": 0.048124101012945175, "learning_rate": 0.01, "loss": 2.0652, "step": 11868 }, { "epoch": 1.2182881773399015, "grad_norm": 0.04603361710906029, "learning_rate": 0.01, "loss": 2.0922, "step": 11871 }, { "epoch": 1.2185960591133005, "grad_norm": 0.07067687064409256, "learning_rate": 0.01, "loss": 2.0946, "step": 11874 }, { "epoch": 1.2189039408866995, "grad_norm": 0.0959327220916748, "learning_rate": 0.01, "loss": 2.096, "step": 11877 }, { "epoch": 1.2192118226600985, "grad_norm": 0.08565320819616318, "learning_rate": 0.01, "loss": 2.09, "step": 11880 }, { "epoch": 1.2195197044334976, "grad_norm": 0.06728377193212509, "learning_rate": 0.01, "loss": 2.0801, "step": 11883 }, { "epoch": 1.2198275862068966, "grad_norm": 0.03809618949890137, "learning_rate": 0.01, "loss": 2.0668, "step": 11886 }, { "epoch": 1.2201354679802956, "grad_norm": 0.049925826489925385, "learning_rate": 0.01, "loss": 2.0625, "step": 11889 }, { "epoch": 1.2204433497536946, "grad_norm": 0.05949478596448898, "learning_rate": 0.01, "loss": 2.0687, "step": 11892 }, { "epoch": 1.2207512315270936, "grad_norm": 0.08161807060241699, "learning_rate": 0.01, "loss": 2.0789, "step": 11895 }, { "epoch": 1.2210591133004927, "grad_norm": 0.05829952284693718, "learning_rate": 0.01, "loss": 2.0846, "step": 11898 }, { "epoch": 1.2213669950738917, "grad_norm": 0.05801619216799736, "learning_rate": 0.01, "loss": 2.1014, "step": 11901 }, { "epoch": 1.2216748768472907, "grad_norm": 0.04123099148273468, "learning_rate": 0.01, "loss": 2.0867, "step": 11904 }, { "epoch": 1.2219827586206897, "grad_norm": 0.05088057741522789, "learning_rate": 0.01, "loss": 2.0751, "step": 11907 }, { "epoch": 1.2222906403940887, "grad_norm": 0.07357197254896164, "learning_rate": 0.01, "loss": 2.1023, "step": 11910 }, { "epoch": 1.2225985221674878, "grad_norm": 0.060078103095293045, "learning_rate": 0.01, "loss": 2.0784, "step": 11913 }, { "epoch": 1.2229064039408868, "grad_norm": 0.12629617750644684, "learning_rate": 0.01, "loss": 2.0767, "step": 11916 }, { "epoch": 1.2232142857142858, "grad_norm": 0.07202067971229553, "learning_rate": 0.01, "loss": 2.0796, "step": 11919 }, { "epoch": 1.2235221674876846, "grad_norm": 0.06407934427261353, "learning_rate": 0.01, "loss": 2.0926, "step": 11922 }, { "epoch": 1.2238300492610836, "grad_norm": 0.053789231926202774, "learning_rate": 0.01, "loss": 2.092, "step": 11925 }, { "epoch": 1.2241379310344827, "grad_norm": 0.04130502790212631, "learning_rate": 0.01, "loss": 2.051, "step": 11928 }, { "epoch": 1.2244458128078817, "grad_norm": 0.05235166475176811, "learning_rate": 0.01, "loss": 2.0829, "step": 11931 }, { "epoch": 1.2247536945812807, "grad_norm": 0.04508119449019432, "learning_rate": 0.01, "loss": 2.0857, "step": 11934 }, { "epoch": 1.2250615763546797, "grad_norm": 0.03570512309670448, "learning_rate": 0.01, "loss": 2.0734, "step": 11937 }, { "epoch": 1.2253694581280787, "grad_norm": 0.04690218344330788, "learning_rate": 0.01, "loss": 2.0988, "step": 11940 }, { "epoch": 1.2256773399014778, "grad_norm": 0.10231764614582062, "learning_rate": 0.01, "loss": 2.0716, "step": 11943 }, { "epoch": 1.2259852216748768, "grad_norm": 0.05221893638372421, "learning_rate": 0.01, "loss": 2.0693, "step": 11946 }, { "epoch": 1.2262931034482758, "grad_norm": 0.0647406056523323, "learning_rate": 0.01, "loss": 2.0809, "step": 11949 }, { "epoch": 1.2266009852216748, "grad_norm": 0.06388009339570999, "learning_rate": 0.01, "loss": 2.0968, "step": 11952 }, { "epoch": 1.2269088669950738, "grad_norm": 0.06904192268848419, "learning_rate": 0.01, "loss": 2.0917, "step": 11955 }, { "epoch": 1.2272167487684729, "grad_norm": 0.08780385553836823, "learning_rate": 0.01, "loss": 2.0881, "step": 11958 }, { "epoch": 1.2275246305418719, "grad_norm": 0.037958092987537384, "learning_rate": 0.01, "loss": 2.0813, "step": 11961 }, { "epoch": 1.227832512315271, "grad_norm": 0.04035305231809616, "learning_rate": 0.01, "loss": 2.0718, "step": 11964 }, { "epoch": 1.22814039408867, "grad_norm": 0.056451354175806046, "learning_rate": 0.01, "loss": 2.0653, "step": 11967 }, { "epoch": 1.228448275862069, "grad_norm": 0.06248374283313751, "learning_rate": 0.01, "loss": 2.0794, "step": 11970 }, { "epoch": 1.228756157635468, "grad_norm": 0.05662978067994118, "learning_rate": 0.01, "loss": 2.0873, "step": 11973 }, { "epoch": 1.229064039408867, "grad_norm": 0.06416438519954681, "learning_rate": 0.01, "loss": 2.0933, "step": 11976 }, { "epoch": 1.229371921182266, "grad_norm": 0.04529969021677971, "learning_rate": 0.01, "loss": 2.0892, "step": 11979 }, { "epoch": 1.229679802955665, "grad_norm": 0.03636370226740837, "learning_rate": 0.01, "loss": 2.0968, "step": 11982 }, { "epoch": 1.229987684729064, "grad_norm": 0.03992651402950287, "learning_rate": 0.01, "loss": 2.0606, "step": 11985 }, { "epoch": 1.230295566502463, "grad_norm": 0.19436125457286835, "learning_rate": 0.01, "loss": 2.0779, "step": 11988 }, { "epoch": 1.230603448275862, "grad_norm": 0.15459048748016357, "learning_rate": 0.01, "loss": 2.0801, "step": 11991 }, { "epoch": 1.2309113300492611, "grad_norm": 0.11131371557712555, "learning_rate": 0.01, "loss": 2.0629, "step": 11994 }, { "epoch": 1.2312192118226601, "grad_norm": 0.06876586377620697, "learning_rate": 0.01, "loss": 2.1081, "step": 11997 }, { "epoch": 1.2315270935960592, "grad_norm": 0.03379599004983902, "learning_rate": 0.01, "loss": 2.0977, "step": 12000 }, { "epoch": 1.2318349753694582, "grad_norm": 0.06905510276556015, "learning_rate": 0.01, "loss": 2.0545, "step": 12003 }, { "epoch": 1.2321428571428572, "grad_norm": 0.05859539657831192, "learning_rate": 0.01, "loss": 2.0751, "step": 12006 }, { "epoch": 1.2324507389162562, "grad_norm": 0.1200842559337616, "learning_rate": 0.01, "loss": 2.1045, "step": 12009 }, { "epoch": 1.2327586206896552, "grad_norm": 0.09969060868024826, "learning_rate": 0.01, "loss": 2.0641, "step": 12012 }, { "epoch": 1.2330665024630543, "grad_norm": 0.08915867656469345, "learning_rate": 0.01, "loss": 2.0585, "step": 12015 }, { "epoch": 1.2333743842364533, "grad_norm": 0.10951671004295349, "learning_rate": 0.01, "loss": 2.1007, "step": 12018 }, { "epoch": 1.2336822660098523, "grad_norm": 0.15262556076049805, "learning_rate": 0.01, "loss": 2.0643, "step": 12021 }, { "epoch": 1.2339901477832513, "grad_norm": 0.05622226372361183, "learning_rate": 0.01, "loss": 2.0796, "step": 12024 }, { "epoch": 1.2342980295566504, "grad_norm": 0.05918841436505318, "learning_rate": 0.01, "loss": 2.0911, "step": 12027 }, { "epoch": 1.2346059113300494, "grad_norm": 0.04867622256278992, "learning_rate": 0.01, "loss": 2.0872, "step": 12030 }, { "epoch": 1.2349137931034484, "grad_norm": 0.04389597848057747, "learning_rate": 0.01, "loss": 2.0882, "step": 12033 }, { "epoch": 1.2352216748768472, "grad_norm": 0.1209108904004097, "learning_rate": 0.01, "loss": 2.0394, "step": 12036 }, { "epoch": 1.2355295566502462, "grad_norm": 0.08446931838989258, "learning_rate": 0.01, "loss": 2.0744, "step": 12039 }, { "epoch": 1.2358374384236452, "grad_norm": 0.07141686230897903, "learning_rate": 0.01, "loss": 2.1099, "step": 12042 }, { "epoch": 1.2361453201970443, "grad_norm": 0.1216801181435585, "learning_rate": 0.01, "loss": 2.0754, "step": 12045 }, { "epoch": 1.2364532019704433, "grad_norm": 0.10539086163043976, "learning_rate": 0.01, "loss": 2.0887, "step": 12048 }, { "epoch": 1.2367610837438423, "grad_norm": 0.09336747229099274, "learning_rate": 0.01, "loss": 2.0637, "step": 12051 }, { "epoch": 1.2370689655172413, "grad_norm": 0.091059610247612, "learning_rate": 0.01, "loss": 2.057, "step": 12054 }, { "epoch": 1.2373768472906403, "grad_norm": 0.08291159570217133, "learning_rate": 0.01, "loss": 2.1039, "step": 12057 }, { "epoch": 1.2376847290640394, "grad_norm": 0.07626821845769882, "learning_rate": 0.01, "loss": 2.0784, "step": 12060 }, { "epoch": 1.2379926108374384, "grad_norm": 0.05197496339678764, "learning_rate": 0.01, "loss": 2.0968, "step": 12063 }, { "epoch": 1.2383004926108374, "grad_norm": 0.061275266110897064, "learning_rate": 0.01, "loss": 2.095, "step": 12066 }, { "epoch": 1.2386083743842364, "grad_norm": 0.04282483085989952, "learning_rate": 0.01, "loss": 2.083, "step": 12069 }, { "epoch": 1.2389162561576355, "grad_norm": 0.037066422402858734, "learning_rate": 0.01, "loss": 2.0653, "step": 12072 }, { "epoch": 1.2392241379310345, "grad_norm": 0.0467105396091938, "learning_rate": 0.01, "loss": 2.0913, "step": 12075 }, { "epoch": 1.2395320197044335, "grad_norm": 0.053995974361896515, "learning_rate": 0.01, "loss": 2.0754, "step": 12078 }, { "epoch": 1.2398399014778325, "grad_norm": 0.08583737164735794, "learning_rate": 0.01, "loss": 2.089, "step": 12081 }, { "epoch": 1.2401477832512315, "grad_norm": 0.07264076173305511, "learning_rate": 0.01, "loss": 2.07, "step": 12084 }, { "epoch": 1.2404556650246306, "grad_norm": 0.062001802027225494, "learning_rate": 0.01, "loss": 2.0969, "step": 12087 }, { "epoch": 1.2407635467980296, "grad_norm": 0.05311381444334984, "learning_rate": 0.01, "loss": 2.0833, "step": 12090 }, { "epoch": 1.2410714285714286, "grad_norm": 0.04272656887769699, "learning_rate": 0.01, "loss": 2.0883, "step": 12093 }, { "epoch": 1.2413793103448276, "grad_norm": 0.10696172714233398, "learning_rate": 0.01, "loss": 2.0925, "step": 12096 }, { "epoch": 1.2416871921182266, "grad_norm": 0.08625493943691254, "learning_rate": 0.01, "loss": 2.0616, "step": 12099 }, { "epoch": 1.2419950738916257, "grad_norm": 0.06818173080682755, "learning_rate": 0.01, "loss": 2.0943, "step": 12102 }, { "epoch": 1.2423029556650247, "grad_norm": 0.050731681287288666, "learning_rate": 0.01, "loss": 2.0802, "step": 12105 }, { "epoch": 1.2426108374384237, "grad_norm": 0.08426883816719055, "learning_rate": 0.01, "loss": 2.0825, "step": 12108 }, { "epoch": 1.2429187192118227, "grad_norm": 0.09432832151651382, "learning_rate": 0.01, "loss": 2.0834, "step": 12111 }, { "epoch": 1.2432266009852218, "grad_norm": 0.06951441615819931, "learning_rate": 0.01, "loss": 2.0951, "step": 12114 }, { "epoch": 1.2435344827586208, "grad_norm": 0.06427393108606339, "learning_rate": 0.01, "loss": 2.0706, "step": 12117 }, { "epoch": 1.2438423645320198, "grad_norm": 0.03967609629034996, "learning_rate": 0.01, "loss": 2.0897, "step": 12120 }, { "epoch": 1.2441502463054186, "grad_norm": 0.036665160208940506, "learning_rate": 0.01, "loss": 2.0791, "step": 12123 }, { "epoch": 1.2444581280788176, "grad_norm": 0.072290800511837, "learning_rate": 0.01, "loss": 2.0889, "step": 12126 }, { "epoch": 1.2447660098522166, "grad_norm": 0.07136868685483932, "learning_rate": 0.01, "loss": 2.0688, "step": 12129 }, { "epoch": 1.2450738916256157, "grad_norm": 0.15400493144989014, "learning_rate": 0.01, "loss": 2.0821, "step": 12132 }, { "epoch": 1.2453817733990147, "grad_norm": 0.07114578038454056, "learning_rate": 0.01, "loss": 2.0719, "step": 12135 }, { "epoch": 1.2456896551724137, "grad_norm": 0.043961767107248306, "learning_rate": 0.01, "loss": 2.0877, "step": 12138 }, { "epoch": 1.2459975369458127, "grad_norm": 0.056267060339450836, "learning_rate": 0.01, "loss": 2.0688, "step": 12141 }, { "epoch": 1.2463054187192117, "grad_norm": 0.035889722406864166, "learning_rate": 0.01, "loss": 2.0783, "step": 12144 }, { "epoch": 1.2466133004926108, "grad_norm": 0.1781640499830246, "learning_rate": 0.01, "loss": 2.0574, "step": 12147 }, { "epoch": 1.2469211822660098, "grad_norm": 0.0891503393650055, "learning_rate": 0.01, "loss": 2.0957, "step": 12150 }, { "epoch": 1.2472290640394088, "grad_norm": 0.047431472688913345, "learning_rate": 0.01, "loss": 2.0696, "step": 12153 }, { "epoch": 1.2475369458128078, "grad_norm": 0.04693286865949631, "learning_rate": 0.01, "loss": 2.0438, "step": 12156 }, { "epoch": 1.2478448275862069, "grad_norm": 0.0382777564227581, "learning_rate": 0.01, "loss": 2.0851, "step": 12159 }, { "epoch": 1.2481527093596059, "grad_norm": 0.04085429012775421, "learning_rate": 0.01, "loss": 2.0846, "step": 12162 }, { "epoch": 1.248460591133005, "grad_norm": 0.05329781025648117, "learning_rate": 0.01, "loss": 2.0732, "step": 12165 }, { "epoch": 1.248768472906404, "grad_norm": 0.06961992383003235, "learning_rate": 0.01, "loss": 2.0372, "step": 12168 }, { "epoch": 1.249076354679803, "grad_norm": 0.05290938913822174, "learning_rate": 0.01, "loss": 2.0692, "step": 12171 }, { "epoch": 1.249384236453202, "grad_norm": 0.1247674971818924, "learning_rate": 0.01, "loss": 2.0679, "step": 12174 }, { "epoch": 1.249692118226601, "grad_norm": 0.04983863607048988, "learning_rate": 0.01, "loss": 2.0611, "step": 12177 }, { "epoch": 1.25, "grad_norm": 0.08552074432373047, "learning_rate": 0.01, "loss": 2.1084, "step": 12180 }, { "epoch": 1.250307881773399, "grad_norm": 0.1376069337129593, "learning_rate": 0.01, "loss": 2.0997, "step": 12183 }, { "epoch": 1.250615763546798, "grad_norm": 0.07097752392292023, "learning_rate": 0.01, "loss": 2.0761, "step": 12186 }, { "epoch": 1.250923645320197, "grad_norm": 0.03953644260764122, "learning_rate": 0.01, "loss": 2.0675, "step": 12189 }, { "epoch": 1.251231527093596, "grad_norm": 0.04611526057124138, "learning_rate": 0.01, "loss": 2.0717, "step": 12192 }, { "epoch": 1.251539408866995, "grad_norm": 0.07573895156383514, "learning_rate": 0.01, "loss": 2.0785, "step": 12195 }, { "epoch": 1.2518472906403941, "grad_norm": 0.07144660502672195, "learning_rate": 0.01, "loss": 2.0815, "step": 12198 }, { "epoch": 1.2521551724137931, "grad_norm": 0.05297645181417465, "learning_rate": 0.01, "loss": 2.093, "step": 12201 }, { "epoch": 1.2524630541871922, "grad_norm": 0.044887710362672806, "learning_rate": 0.01, "loss": 2.0752, "step": 12204 }, { "epoch": 1.2527709359605912, "grad_norm": 0.04305564984679222, "learning_rate": 0.01, "loss": 2.0596, "step": 12207 }, { "epoch": 1.2530788177339902, "grad_norm": 0.057785287499427795, "learning_rate": 0.01, "loss": 2.094, "step": 12210 }, { "epoch": 1.2533866995073892, "grad_norm": 0.04404570534825325, "learning_rate": 0.01, "loss": 2.1037, "step": 12213 }, { "epoch": 1.2536945812807883, "grad_norm": 0.055468104779720306, "learning_rate": 0.01, "loss": 2.0851, "step": 12216 }, { "epoch": 1.2540024630541873, "grad_norm": 0.17121906578540802, "learning_rate": 0.01, "loss": 2.081, "step": 12219 }, { "epoch": 1.2543103448275863, "grad_norm": 0.09411416202783585, "learning_rate": 0.01, "loss": 2.0974, "step": 12222 }, { "epoch": 1.2546182266009853, "grad_norm": 0.07855021953582764, "learning_rate": 0.01, "loss": 2.0733, "step": 12225 }, { "epoch": 1.2549261083743843, "grad_norm": 0.052616432309150696, "learning_rate": 0.01, "loss": 2.0372, "step": 12228 }, { "epoch": 1.2552339901477834, "grad_norm": 0.047992121428251266, "learning_rate": 0.01, "loss": 2.0918, "step": 12231 }, { "epoch": 1.2555418719211824, "grad_norm": 0.04336715489625931, "learning_rate": 0.01, "loss": 2.0511, "step": 12234 }, { "epoch": 1.2558497536945814, "grad_norm": 0.03128316253423691, "learning_rate": 0.01, "loss": 2.0882, "step": 12237 }, { "epoch": 1.2561576354679804, "grad_norm": 0.06315557658672333, "learning_rate": 0.01, "loss": 2.0918, "step": 12240 }, { "epoch": 1.2564655172413794, "grad_norm": 0.0528687946498394, "learning_rate": 0.01, "loss": 2.0556, "step": 12243 }, { "epoch": 1.2567733990147782, "grad_norm": 0.17166706919670105, "learning_rate": 0.01, "loss": 2.068, "step": 12246 }, { "epoch": 1.2570812807881773, "grad_norm": 0.11394128203392029, "learning_rate": 0.01, "loss": 2.1179, "step": 12249 }, { "epoch": 1.2573891625615763, "grad_norm": 0.08554805815219879, "learning_rate": 0.01, "loss": 2.0857, "step": 12252 }, { "epoch": 1.2576970443349753, "grad_norm": 0.05203767865896225, "learning_rate": 0.01, "loss": 2.0975, "step": 12255 }, { "epoch": 1.2580049261083743, "grad_norm": 0.06072428077459335, "learning_rate": 0.01, "loss": 2.0692, "step": 12258 }, { "epoch": 1.2583128078817734, "grad_norm": 0.044136617332696915, "learning_rate": 0.01, "loss": 2.0458, "step": 12261 }, { "epoch": 1.2586206896551724, "grad_norm": 0.038774993270635605, "learning_rate": 0.01, "loss": 2.0835, "step": 12264 }, { "epoch": 1.2589285714285714, "grad_norm": 0.03669529780745506, "learning_rate": 0.01, "loss": 2.0949, "step": 12267 }, { "epoch": 1.2592364532019704, "grad_norm": 0.050722066313028336, "learning_rate": 0.01, "loss": 2.0772, "step": 12270 }, { "epoch": 1.2595443349753694, "grad_norm": 0.12684300541877747, "learning_rate": 0.01, "loss": 2.0814, "step": 12273 }, { "epoch": 1.2598522167487685, "grad_norm": 0.07431039214134216, "learning_rate": 0.01, "loss": 2.1001, "step": 12276 }, { "epoch": 1.2601600985221675, "grad_norm": 0.07050034403800964, "learning_rate": 0.01, "loss": 2.0507, "step": 12279 }, { "epoch": 1.2604679802955665, "grad_norm": 0.0553620308637619, "learning_rate": 0.01, "loss": 2.0757, "step": 12282 }, { "epoch": 1.2607758620689655, "grad_norm": 0.10946903377771378, "learning_rate": 0.01, "loss": 2.0659, "step": 12285 }, { "epoch": 1.2610837438423645, "grad_norm": 0.06346802413463593, "learning_rate": 0.01, "loss": 2.0871, "step": 12288 }, { "epoch": 1.2613916256157636, "grad_norm": 0.09386668354272842, "learning_rate": 0.01, "loss": 2.0702, "step": 12291 }, { "epoch": 1.2616995073891626, "grad_norm": 0.05672946944832802, "learning_rate": 0.01, "loss": 2.0812, "step": 12294 }, { "epoch": 1.2620073891625616, "grad_norm": 0.1079539805650711, "learning_rate": 0.01, "loss": 2.0851, "step": 12297 }, { "epoch": 1.2623152709359606, "grad_norm": 0.043078985065221786, "learning_rate": 0.01, "loss": 2.0846, "step": 12300 }, { "epoch": 1.2626231527093597, "grad_norm": 0.043098706752061844, "learning_rate": 0.01, "loss": 2.0541, "step": 12303 }, { "epoch": 1.2629310344827587, "grad_norm": 0.04908977448940277, "learning_rate": 0.01, "loss": 2.0801, "step": 12306 }, { "epoch": 1.2632389162561577, "grad_norm": 0.09897246211767197, "learning_rate": 0.01, "loss": 2.0899, "step": 12309 }, { "epoch": 1.2635467980295567, "grad_norm": 0.0861278846859932, "learning_rate": 0.01, "loss": 2.0613, "step": 12312 }, { "epoch": 1.2638546798029557, "grad_norm": 0.06973280757665634, "learning_rate": 0.01, "loss": 2.0598, "step": 12315 }, { "epoch": 1.2641625615763548, "grad_norm": 0.048658501356840134, "learning_rate": 0.01, "loss": 2.0838, "step": 12318 }, { "epoch": 1.2644704433497536, "grad_norm": 0.05053295940160751, "learning_rate": 0.01, "loss": 2.0665, "step": 12321 }, { "epoch": 1.2647783251231526, "grad_norm": 0.0536380410194397, "learning_rate": 0.01, "loss": 2.0696, "step": 12324 }, { "epoch": 1.2650862068965516, "grad_norm": 0.13452892005443573, "learning_rate": 0.01, "loss": 2.0928, "step": 12327 }, { "epoch": 1.2653940886699506, "grad_norm": 0.056635159999132156, "learning_rate": 0.01, "loss": 2.0903, "step": 12330 }, { "epoch": 1.2657019704433496, "grad_norm": 0.09460306912660599, "learning_rate": 0.01, "loss": 2.0616, "step": 12333 }, { "epoch": 1.2660098522167487, "grad_norm": 0.09019794315099716, "learning_rate": 0.01, "loss": 2.0808, "step": 12336 }, { "epoch": 1.2663177339901477, "grad_norm": 0.04020017758011818, "learning_rate": 0.01, "loss": 2.0613, "step": 12339 }, { "epoch": 1.2666256157635467, "grad_norm": 0.05063892900943756, "learning_rate": 0.01, "loss": 2.0846, "step": 12342 }, { "epoch": 1.2669334975369457, "grad_norm": 0.06472054123878479, "learning_rate": 0.01, "loss": 2.0884, "step": 12345 }, { "epoch": 1.2672413793103448, "grad_norm": 0.0523315854370594, "learning_rate": 0.01, "loss": 2.0756, "step": 12348 }, { "epoch": 1.2675492610837438, "grad_norm": 0.040240950882434845, "learning_rate": 0.01, "loss": 2.0646, "step": 12351 }, { "epoch": 1.2678571428571428, "grad_norm": 0.061988551169633865, "learning_rate": 0.01, "loss": 2.0899, "step": 12354 }, { "epoch": 1.2681650246305418, "grad_norm": 0.03831657022237778, "learning_rate": 0.01, "loss": 2.0633, "step": 12357 }, { "epoch": 1.2684729064039408, "grad_norm": 0.105617955327034, "learning_rate": 0.01, "loss": 2.0553, "step": 12360 }, { "epoch": 1.2687807881773399, "grad_norm": 0.09372366219758987, "learning_rate": 0.01, "loss": 2.0522, "step": 12363 }, { "epoch": 1.2690886699507389, "grad_norm": 0.10305638611316681, "learning_rate": 0.01, "loss": 2.0543, "step": 12366 }, { "epoch": 1.269396551724138, "grad_norm": 0.07187418639659882, "learning_rate": 0.01, "loss": 2.0775, "step": 12369 }, { "epoch": 1.269704433497537, "grad_norm": 0.03744306415319443, "learning_rate": 0.01, "loss": 2.0771, "step": 12372 }, { "epoch": 1.270012315270936, "grad_norm": 0.03059488907456398, "learning_rate": 0.01, "loss": 2.067, "step": 12375 }, { "epoch": 1.270320197044335, "grad_norm": 0.09767211973667145, "learning_rate": 0.01, "loss": 2.0889, "step": 12378 }, { "epoch": 1.270628078817734, "grad_norm": 0.05093003436923027, "learning_rate": 0.01, "loss": 2.0852, "step": 12381 }, { "epoch": 1.270935960591133, "grad_norm": 0.04648155719041824, "learning_rate": 0.01, "loss": 2.0775, "step": 12384 }, { "epoch": 1.271243842364532, "grad_norm": 0.04745417460799217, "learning_rate": 0.01, "loss": 2.0888, "step": 12387 }, { "epoch": 1.271551724137931, "grad_norm": 0.05171092227101326, "learning_rate": 0.01, "loss": 2.075, "step": 12390 }, { "epoch": 1.27185960591133, "grad_norm": 0.05051850154995918, "learning_rate": 0.01, "loss": 2.0754, "step": 12393 }, { "epoch": 1.272167487684729, "grad_norm": 0.06048591807484627, "learning_rate": 0.01, "loss": 2.1025, "step": 12396 }, { "epoch": 1.2724753694581281, "grad_norm": 0.09673628211021423, "learning_rate": 0.01, "loss": 2.0866, "step": 12399 }, { "epoch": 1.2727832512315271, "grad_norm": 0.11826295405626297, "learning_rate": 0.01, "loss": 2.0757, "step": 12402 }, { "epoch": 1.2730911330049262, "grad_norm": 0.06271769106388092, "learning_rate": 0.01, "loss": 2.0797, "step": 12405 }, { "epoch": 1.2733990147783252, "grad_norm": 0.0463738851249218, "learning_rate": 0.01, "loss": 2.054, "step": 12408 }, { "epoch": 1.2737068965517242, "grad_norm": 0.035649579018354416, "learning_rate": 0.01, "loss": 2.0747, "step": 12411 }, { "epoch": 1.2740147783251232, "grad_norm": 0.08586122840642929, "learning_rate": 0.01, "loss": 2.0692, "step": 12414 }, { "epoch": 1.2743226600985222, "grad_norm": 0.08538860082626343, "learning_rate": 0.01, "loss": 2.0617, "step": 12417 }, { "epoch": 1.2746305418719213, "grad_norm": 0.05974709242582321, "learning_rate": 0.01, "loss": 2.0527, "step": 12420 }, { "epoch": 1.2749384236453203, "grad_norm": 0.057023100554943085, "learning_rate": 0.01, "loss": 2.0751, "step": 12423 }, { "epoch": 1.2752463054187193, "grad_norm": 0.062105972319841385, "learning_rate": 0.01, "loss": 2.0946, "step": 12426 }, { "epoch": 1.2755541871921183, "grad_norm": 0.048221688717603683, "learning_rate": 0.01, "loss": 2.0662, "step": 12429 }, { "epoch": 1.2758620689655173, "grad_norm": 0.04440610110759735, "learning_rate": 0.01, "loss": 2.0852, "step": 12432 }, { "epoch": 1.2761699507389164, "grad_norm": 0.06074054539203644, "learning_rate": 0.01, "loss": 2.0667, "step": 12435 }, { "epoch": 1.2764778325123154, "grad_norm": 0.09477720409631729, "learning_rate": 0.01, "loss": 2.0815, "step": 12438 }, { "epoch": 1.2767857142857144, "grad_norm": 0.08470610529184341, "learning_rate": 0.01, "loss": 2.1023, "step": 12441 }, { "epoch": 1.2770935960591134, "grad_norm": 0.09079881012439728, "learning_rate": 0.01, "loss": 2.0761, "step": 12444 }, { "epoch": 1.2774014778325122, "grad_norm": 0.06132930517196655, "learning_rate": 0.01, "loss": 2.1027, "step": 12447 }, { "epoch": 1.2777093596059113, "grad_norm": 0.04702606052160263, "learning_rate": 0.01, "loss": 2.1051, "step": 12450 }, { "epoch": 1.2780172413793103, "grad_norm": 0.06621818989515305, "learning_rate": 0.01, "loss": 2.0835, "step": 12453 }, { "epoch": 1.2783251231527093, "grad_norm": 0.047375794500112534, "learning_rate": 0.01, "loss": 2.0982, "step": 12456 }, { "epoch": 1.2786330049261083, "grad_norm": 0.03766035661101341, "learning_rate": 0.01, "loss": 2.0468, "step": 12459 }, { "epoch": 1.2789408866995073, "grad_norm": 0.10769324004650116, "learning_rate": 0.01, "loss": 2.0848, "step": 12462 }, { "epoch": 1.2792487684729064, "grad_norm": 0.06848851591348648, "learning_rate": 0.01, "loss": 2.0664, "step": 12465 }, { "epoch": 1.2795566502463054, "grad_norm": 0.09864836931228638, "learning_rate": 0.01, "loss": 2.0719, "step": 12468 }, { "epoch": 1.2798645320197044, "grad_norm": 0.04042387008666992, "learning_rate": 0.01, "loss": 2.0672, "step": 12471 }, { "epoch": 1.2801724137931034, "grad_norm": 0.09212526679039001, "learning_rate": 0.01, "loss": 2.0789, "step": 12474 }, { "epoch": 1.2804802955665024, "grad_norm": 0.08713985234498978, "learning_rate": 0.01, "loss": 2.0765, "step": 12477 }, { "epoch": 1.2807881773399015, "grad_norm": 0.04133505001664162, "learning_rate": 0.01, "loss": 2.0809, "step": 12480 }, { "epoch": 1.2810960591133005, "grad_norm": 0.07466418296098709, "learning_rate": 0.01, "loss": 2.0499, "step": 12483 }, { "epoch": 1.2814039408866995, "grad_norm": 0.08685484528541565, "learning_rate": 0.01, "loss": 2.0689, "step": 12486 }, { "epoch": 1.2817118226600985, "grad_norm": 0.13300663232803345, "learning_rate": 0.01, "loss": 2.0761, "step": 12489 }, { "epoch": 1.2820197044334976, "grad_norm": 0.1024186760187149, "learning_rate": 0.01, "loss": 2.0763, "step": 12492 }, { "epoch": 1.2823275862068966, "grad_norm": 0.05908042937517166, "learning_rate": 0.01, "loss": 2.0485, "step": 12495 }, { "epoch": 1.2826354679802956, "grad_norm": 0.03715427219867706, "learning_rate": 0.01, "loss": 2.0925, "step": 12498 }, { "epoch": 1.2829433497536946, "grad_norm": 0.05033004283905029, "learning_rate": 0.01, "loss": 2.0875, "step": 12501 }, { "epoch": 1.2832512315270936, "grad_norm": 0.04851456359028816, "learning_rate": 0.01, "loss": 2.0547, "step": 12504 }, { "epoch": 1.2835591133004927, "grad_norm": 0.08153282105922699, "learning_rate": 0.01, "loss": 2.0828, "step": 12507 }, { "epoch": 1.2838669950738917, "grad_norm": 0.07549238950014114, "learning_rate": 0.01, "loss": 2.0681, "step": 12510 }, { "epoch": 1.2841748768472907, "grad_norm": 0.08571973443031311, "learning_rate": 0.01, "loss": 2.0556, "step": 12513 }, { "epoch": 1.2844827586206897, "grad_norm": 0.1036754697561264, "learning_rate": 0.01, "loss": 2.0712, "step": 12516 }, { "epoch": 1.2847906403940887, "grad_norm": 0.04329349100589752, "learning_rate": 0.01, "loss": 2.0711, "step": 12519 }, { "epoch": 1.2850985221674878, "grad_norm": 0.03718428686261177, "learning_rate": 0.01, "loss": 2.0812, "step": 12522 }, { "epoch": 1.2854064039408866, "grad_norm": 0.07977878302335739, "learning_rate": 0.01, "loss": 2.0562, "step": 12525 }, { "epoch": 1.2857142857142856, "grad_norm": 0.11540202796459198, "learning_rate": 0.01, "loss": 2.0666, "step": 12528 }, { "epoch": 1.2860221674876846, "grad_norm": 0.07631656527519226, "learning_rate": 0.01, "loss": 2.0694, "step": 12531 }, { "epoch": 1.2863300492610836, "grad_norm": 0.051738426089286804, "learning_rate": 0.01, "loss": 2.0862, "step": 12534 }, { "epoch": 1.2866379310344827, "grad_norm": 0.05070396512746811, "learning_rate": 0.01, "loss": 2.0538, "step": 12537 }, { "epoch": 1.2869458128078817, "grad_norm": 0.039027947932481766, "learning_rate": 0.01, "loss": 2.0487, "step": 12540 }, { "epoch": 1.2872536945812807, "grad_norm": 0.052646003663539886, "learning_rate": 0.01, "loss": 2.0745, "step": 12543 }, { "epoch": 1.2875615763546797, "grad_norm": 0.06035429984331131, "learning_rate": 0.01, "loss": 2.0775, "step": 12546 }, { "epoch": 1.2878694581280787, "grad_norm": 0.0818098783493042, "learning_rate": 0.01, "loss": 2.0685, "step": 12549 }, { "epoch": 1.2881773399014778, "grad_norm": 0.030897030606865883, "learning_rate": 0.01, "loss": 2.0701, "step": 12552 }, { "epoch": 1.2884852216748768, "grad_norm": 0.042795561254024506, "learning_rate": 0.01, "loss": 2.0673, "step": 12555 }, { "epoch": 1.2887931034482758, "grad_norm": 0.11560031026601791, "learning_rate": 0.01, "loss": 2.0637, "step": 12558 }, { "epoch": 1.2891009852216748, "grad_norm": 0.03919963166117668, "learning_rate": 0.01, "loss": 2.0547, "step": 12561 }, { "epoch": 1.2894088669950738, "grad_norm": 0.03816407918930054, "learning_rate": 0.01, "loss": 2.0576, "step": 12564 }, { "epoch": 1.2897167487684729, "grad_norm": 0.04641805216670036, "learning_rate": 0.01, "loss": 2.0779, "step": 12567 }, { "epoch": 1.2900246305418719, "grad_norm": 0.16832102835178375, "learning_rate": 0.01, "loss": 2.0658, "step": 12570 }, { "epoch": 1.290332512315271, "grad_norm": 0.06651032716035843, "learning_rate": 0.01, "loss": 2.0602, "step": 12573 }, { "epoch": 1.29064039408867, "grad_norm": 0.05333925411105156, "learning_rate": 0.01, "loss": 2.0842, "step": 12576 }, { "epoch": 1.290948275862069, "grad_norm": 0.03173685073852539, "learning_rate": 0.01, "loss": 2.0834, "step": 12579 }, { "epoch": 1.291256157635468, "grad_norm": 0.0836583599448204, "learning_rate": 0.01, "loss": 2.0605, "step": 12582 }, { "epoch": 1.291564039408867, "grad_norm": 0.09913724660873413, "learning_rate": 0.01, "loss": 2.0577, "step": 12585 }, { "epoch": 1.291871921182266, "grad_norm": 0.04268624261021614, "learning_rate": 0.01, "loss": 2.1027, "step": 12588 }, { "epoch": 1.292179802955665, "grad_norm": 0.03744608163833618, "learning_rate": 0.01, "loss": 2.0506, "step": 12591 }, { "epoch": 1.292487684729064, "grad_norm": 0.11833969503641129, "learning_rate": 0.01, "loss": 2.0606, "step": 12594 }, { "epoch": 1.292795566502463, "grad_norm": 0.11814229190349579, "learning_rate": 0.01, "loss": 2.0641, "step": 12597 }, { "epoch": 1.293103448275862, "grad_norm": 0.04583375155925751, "learning_rate": 0.01, "loss": 2.0898, "step": 12600 }, { "epoch": 1.2934113300492611, "grad_norm": 0.11559072881937027, "learning_rate": 0.01, "loss": 2.0657, "step": 12603 }, { "epoch": 1.2937192118226601, "grad_norm": 0.050608474761247635, "learning_rate": 0.01, "loss": 2.0879, "step": 12606 }, { "epoch": 1.2940270935960592, "grad_norm": 0.04284593090415001, "learning_rate": 0.01, "loss": 2.0545, "step": 12609 }, { "epoch": 1.2943349753694582, "grad_norm": 0.042168114334344864, "learning_rate": 0.01, "loss": 2.0645, "step": 12612 }, { "epoch": 1.2946428571428572, "grad_norm": 0.06251826882362366, "learning_rate": 0.01, "loss": 2.0621, "step": 12615 }, { "epoch": 1.2949507389162562, "grad_norm": 0.11159554868936539, "learning_rate": 0.01, "loss": 2.0718, "step": 12618 }, { "epoch": 1.2952586206896552, "grad_norm": 0.0715017095208168, "learning_rate": 0.01, "loss": 2.0844, "step": 12621 }, { "epoch": 1.2955665024630543, "grad_norm": 0.12078917771577835, "learning_rate": 0.01, "loss": 2.0604, "step": 12624 }, { "epoch": 1.2958743842364533, "grad_norm": 0.11613093316555023, "learning_rate": 0.01, "loss": 2.0753, "step": 12627 }, { "epoch": 1.2961822660098523, "grad_norm": 0.053543299436569214, "learning_rate": 0.01, "loss": 2.0736, "step": 12630 }, { "epoch": 1.2964901477832513, "grad_norm": 0.05464399605989456, "learning_rate": 0.01, "loss": 2.0845, "step": 12633 }, { "epoch": 1.2967980295566504, "grad_norm": 0.0487944521009922, "learning_rate": 0.01, "loss": 2.0859, "step": 12636 }, { "epoch": 1.2971059113300494, "grad_norm": 0.05266605690121651, "learning_rate": 0.01, "loss": 2.0532, "step": 12639 }, { "epoch": 1.2974137931034484, "grad_norm": 0.07863074541091919, "learning_rate": 0.01, "loss": 2.0496, "step": 12642 }, { "epoch": 1.2977216748768474, "grad_norm": 0.05371072143316269, "learning_rate": 0.01, "loss": 2.1016, "step": 12645 }, { "epoch": 1.2980295566502464, "grad_norm": 0.05592924728989601, "learning_rate": 0.01, "loss": 2.0832, "step": 12648 }, { "epoch": 1.2983374384236452, "grad_norm": 0.0675397738814354, "learning_rate": 0.01, "loss": 2.0784, "step": 12651 }, { "epoch": 1.2986453201970443, "grad_norm": 0.04399113729596138, "learning_rate": 0.01, "loss": 2.0567, "step": 12654 }, { "epoch": 1.2989532019704433, "grad_norm": 0.04609301686286926, "learning_rate": 0.01, "loss": 2.0683, "step": 12657 }, { "epoch": 1.2992610837438423, "grad_norm": 0.05637912079691887, "learning_rate": 0.01, "loss": 2.0655, "step": 12660 }, { "epoch": 1.2995689655172413, "grad_norm": 0.1115126758813858, "learning_rate": 0.01, "loss": 2.0643, "step": 12663 }, { "epoch": 1.2998768472906403, "grad_norm": 0.07762522250413895, "learning_rate": 0.01, "loss": 2.0853, "step": 12666 }, { "epoch": 1.3001847290640394, "grad_norm": 0.06717406958341599, "learning_rate": 0.01, "loss": 2.0881, "step": 12669 }, { "epoch": 1.3004926108374384, "grad_norm": 0.0727803036570549, "learning_rate": 0.01, "loss": 2.0606, "step": 12672 }, { "epoch": 1.3008004926108374, "grad_norm": 0.06588024646043777, "learning_rate": 0.01, "loss": 2.0508, "step": 12675 }, { "epoch": 1.3011083743842364, "grad_norm": 0.09718842804431915, "learning_rate": 0.01, "loss": 2.0793, "step": 12678 }, { "epoch": 1.3014162561576355, "grad_norm": 0.05804411321878433, "learning_rate": 0.01, "loss": 2.0825, "step": 12681 }, { "epoch": 1.3017241379310345, "grad_norm": 0.07549803704023361, "learning_rate": 0.01, "loss": 2.0546, "step": 12684 }, { "epoch": 1.3020320197044335, "grad_norm": 0.04496621713042259, "learning_rate": 0.01, "loss": 2.0483, "step": 12687 }, { "epoch": 1.3023399014778325, "grad_norm": 0.1283668428659439, "learning_rate": 0.01, "loss": 2.0639, "step": 12690 }, { "epoch": 1.3026477832512315, "grad_norm": 0.1276516169309616, "learning_rate": 0.01, "loss": 2.062, "step": 12693 }, { "epoch": 1.3029556650246306, "grad_norm": 0.12865981459617615, "learning_rate": 0.01, "loss": 2.0697, "step": 12696 }, { "epoch": 1.3032635467980296, "grad_norm": 0.05869213864207268, "learning_rate": 0.01, "loss": 2.0955, "step": 12699 }, { "epoch": 1.3035714285714286, "grad_norm": 0.042082637548446655, "learning_rate": 0.01, "loss": 2.0847, "step": 12702 }, { "epoch": 1.3038793103448276, "grad_norm": 0.11474558711051941, "learning_rate": 0.01, "loss": 2.0767, "step": 12705 }, { "epoch": 1.3041871921182266, "grad_norm": 0.12426330894231796, "learning_rate": 0.01, "loss": 2.0787, "step": 12708 }, { "epoch": 1.3044950738916257, "grad_norm": 0.06731969118118286, "learning_rate": 0.01, "loss": 2.064, "step": 12711 }, { "epoch": 1.3048029556650247, "grad_norm": 0.049505047500133514, "learning_rate": 0.01, "loss": 2.0848, "step": 12714 }, { "epoch": 1.3051108374384237, "grad_norm": 0.04460617154836655, "learning_rate": 0.01, "loss": 2.0958, "step": 12717 }, { "epoch": 1.3054187192118227, "grad_norm": 0.05461740121245384, "learning_rate": 0.01, "loss": 2.0724, "step": 12720 }, { "epoch": 1.3057266009852218, "grad_norm": 0.04074997082352638, "learning_rate": 0.01, "loss": 2.0453, "step": 12723 }, { "epoch": 1.3060344827586206, "grad_norm": 0.04925557225942612, "learning_rate": 0.01, "loss": 2.0707, "step": 12726 }, { "epoch": 1.3063423645320196, "grad_norm": 0.06570678949356079, "learning_rate": 0.01, "loss": 2.0703, "step": 12729 }, { "epoch": 1.3066502463054186, "grad_norm": 0.076143778860569, "learning_rate": 0.01, "loss": 2.0639, "step": 12732 }, { "epoch": 1.3069581280788176, "grad_norm": 0.06691130995750427, "learning_rate": 0.01, "loss": 2.0858, "step": 12735 }, { "epoch": 1.3072660098522166, "grad_norm": 0.06733332574367523, "learning_rate": 0.01, "loss": 2.0622, "step": 12738 }, { "epoch": 1.3075738916256157, "grad_norm": 0.08008868247270584, "learning_rate": 0.01, "loss": 2.085, "step": 12741 }, { "epoch": 1.3078817733990147, "grad_norm": 0.10713468492031097, "learning_rate": 0.01, "loss": 2.0642, "step": 12744 }, { "epoch": 1.3081896551724137, "grad_norm": 0.101436547935009, "learning_rate": 0.01, "loss": 2.0835, "step": 12747 }, { "epoch": 1.3084975369458127, "grad_norm": 0.0552450455725193, "learning_rate": 0.01, "loss": 2.0553, "step": 12750 }, { "epoch": 1.3088054187192117, "grad_norm": 0.08755996823310852, "learning_rate": 0.01, "loss": 2.0564, "step": 12753 }, { "epoch": 1.3091133004926108, "grad_norm": 0.03980748727917671, "learning_rate": 0.01, "loss": 2.084, "step": 12756 }, { "epoch": 1.3094211822660098, "grad_norm": 0.0971774309873581, "learning_rate": 0.01, "loss": 2.0858, "step": 12759 }, { "epoch": 1.3097290640394088, "grad_norm": 0.05845404043793678, "learning_rate": 0.01, "loss": 2.0792, "step": 12762 }, { "epoch": 1.3100369458128078, "grad_norm": 0.08498022705316544, "learning_rate": 0.01, "loss": 2.0932, "step": 12765 }, { "epoch": 1.3103448275862069, "grad_norm": 0.05135398730635643, "learning_rate": 0.01, "loss": 2.0796, "step": 12768 }, { "epoch": 1.3106527093596059, "grad_norm": 0.048872511833906174, "learning_rate": 0.01, "loss": 2.0675, "step": 12771 }, { "epoch": 1.310960591133005, "grad_norm": 0.0529983788728714, "learning_rate": 0.01, "loss": 2.0573, "step": 12774 }, { "epoch": 1.311268472906404, "grad_norm": 0.03580658137798309, "learning_rate": 0.01, "loss": 2.0741, "step": 12777 }, { "epoch": 1.311576354679803, "grad_norm": 0.03904234617948532, "learning_rate": 0.01, "loss": 2.0691, "step": 12780 }, { "epoch": 1.311884236453202, "grad_norm": 0.04073551669716835, "learning_rate": 0.01, "loss": 2.0694, "step": 12783 }, { "epoch": 1.312192118226601, "grad_norm": 0.058973487466573715, "learning_rate": 0.01, "loss": 2.0381, "step": 12786 }, { "epoch": 1.3125, "grad_norm": 0.05673586577177048, "learning_rate": 0.01, "loss": 2.0691, "step": 12789 }, { "epoch": 1.312807881773399, "grad_norm": 0.13680770993232727, "learning_rate": 0.01, "loss": 2.0673, "step": 12792 }, { "epoch": 1.313115763546798, "grad_norm": 0.0454241968691349, "learning_rate": 0.01, "loss": 2.0766, "step": 12795 }, { "epoch": 1.313423645320197, "grad_norm": 0.04074293375015259, "learning_rate": 0.01, "loss": 2.0593, "step": 12798 }, { "epoch": 1.313731527093596, "grad_norm": 0.04174893721938133, "learning_rate": 0.01, "loss": 2.0537, "step": 12801 }, { "epoch": 1.314039408866995, "grad_norm": 0.06716062128543854, "learning_rate": 0.01, "loss": 2.0642, "step": 12804 }, { "epoch": 1.3143472906403941, "grad_norm": 0.09866861253976822, "learning_rate": 0.01, "loss": 2.0889, "step": 12807 }, { "epoch": 1.3146551724137931, "grad_norm": 0.13097235560417175, "learning_rate": 0.01, "loss": 2.0744, "step": 12810 }, { "epoch": 1.3149630541871922, "grad_norm": 0.07859724014997482, "learning_rate": 0.01, "loss": 2.0899, "step": 12813 }, { "epoch": 1.3152709359605912, "grad_norm": 0.06141912192106247, "learning_rate": 0.01, "loss": 2.0666, "step": 12816 }, { "epoch": 1.3155788177339902, "grad_norm": 0.05112985149025917, "learning_rate": 0.01, "loss": 2.0658, "step": 12819 }, { "epoch": 1.3158866995073892, "grad_norm": 0.03482404351234436, "learning_rate": 0.01, "loss": 2.0802, "step": 12822 }, { "epoch": 1.3161945812807883, "grad_norm": 0.05562854930758476, "learning_rate": 0.01, "loss": 2.0659, "step": 12825 }, { "epoch": 1.3165024630541873, "grad_norm": 0.04841645434498787, "learning_rate": 0.01, "loss": 2.058, "step": 12828 }, { "epoch": 1.3168103448275863, "grad_norm": 0.0756571963429451, "learning_rate": 0.01, "loss": 2.0283, "step": 12831 }, { "epoch": 1.3171182266009853, "grad_norm": 0.09575197845697403, "learning_rate": 0.01, "loss": 2.0709, "step": 12834 }, { "epoch": 1.3174261083743843, "grad_norm": 0.07003197073936462, "learning_rate": 0.01, "loss": 2.0409, "step": 12837 }, { "epoch": 1.3177339901477834, "grad_norm": 0.12592460215091705, "learning_rate": 0.01, "loss": 2.051, "step": 12840 }, { "epoch": 1.3180418719211824, "grad_norm": 0.07086621969938278, "learning_rate": 0.01, "loss": 2.0856, "step": 12843 }, { "epoch": 1.3183497536945814, "grad_norm": 0.07367062568664551, "learning_rate": 0.01, "loss": 2.0759, "step": 12846 }, { "epoch": 1.3186576354679804, "grad_norm": 0.06731852889060974, "learning_rate": 0.01, "loss": 2.077, "step": 12849 }, { "epoch": 1.3189655172413794, "grad_norm": 0.07499510049819946, "learning_rate": 0.01, "loss": 2.072, "step": 12852 }, { "epoch": 1.3192733990147782, "grad_norm": 0.07604499906301498, "learning_rate": 0.01, "loss": 2.0717, "step": 12855 }, { "epoch": 1.3195812807881773, "grad_norm": 0.0770401731133461, "learning_rate": 0.01, "loss": 2.0894, "step": 12858 }, { "epoch": 1.3198891625615763, "grad_norm": 0.06753168255090714, "learning_rate": 0.01, "loss": 2.0546, "step": 12861 }, { "epoch": 1.3201970443349753, "grad_norm": 0.14175836741924286, "learning_rate": 0.01, "loss": 2.0811, "step": 12864 }, { "epoch": 1.3205049261083743, "grad_norm": 0.04258207604289055, "learning_rate": 0.01, "loss": 2.0831, "step": 12867 }, { "epoch": 1.3208128078817734, "grad_norm": 0.03372815623879433, "learning_rate": 0.01, "loss": 2.0827, "step": 12870 }, { "epoch": 1.3211206896551724, "grad_norm": 0.040649689733982086, "learning_rate": 0.01, "loss": 2.067, "step": 12873 }, { "epoch": 1.3214285714285714, "grad_norm": 0.060684412717819214, "learning_rate": 0.01, "loss": 2.0701, "step": 12876 }, { "epoch": 1.3217364532019704, "grad_norm": 0.036452196538448334, "learning_rate": 0.01, "loss": 2.0726, "step": 12879 }, { "epoch": 1.3220443349753694, "grad_norm": 0.09457682818174362, "learning_rate": 0.01, "loss": 2.0709, "step": 12882 }, { "epoch": 1.3223522167487685, "grad_norm": 0.049776870757341385, "learning_rate": 0.01, "loss": 2.0354, "step": 12885 }, { "epoch": 1.3226600985221675, "grad_norm": 0.05794042348861694, "learning_rate": 0.01, "loss": 2.0781, "step": 12888 }, { "epoch": 1.3229679802955665, "grad_norm": 0.052100833505392075, "learning_rate": 0.01, "loss": 2.0801, "step": 12891 }, { "epoch": 1.3232758620689655, "grad_norm": 0.06558441370725632, "learning_rate": 0.01, "loss": 2.076, "step": 12894 }, { "epoch": 1.3235837438423645, "grad_norm": 0.1107778549194336, "learning_rate": 0.01, "loss": 2.0859, "step": 12897 }, { "epoch": 1.3238916256157636, "grad_norm": 0.052982959896326065, "learning_rate": 0.01, "loss": 2.0749, "step": 12900 }, { "epoch": 1.3241995073891626, "grad_norm": 0.0976911410689354, "learning_rate": 0.01, "loss": 2.067, "step": 12903 }, { "epoch": 1.3245073891625616, "grad_norm": 0.05620484799146652, "learning_rate": 0.01, "loss": 2.0892, "step": 12906 }, { "epoch": 1.3248152709359606, "grad_norm": 0.12753431499004364, "learning_rate": 0.01, "loss": 2.073, "step": 12909 }, { "epoch": 1.3251231527093597, "grad_norm": 0.14311249554157257, "learning_rate": 0.01, "loss": 2.0641, "step": 12912 }, { "epoch": 1.3254310344827587, "grad_norm": 0.049242276698350906, "learning_rate": 0.01, "loss": 2.0701, "step": 12915 }, { "epoch": 1.3257389162561577, "grad_norm": 0.07875422388315201, "learning_rate": 0.01, "loss": 2.0889, "step": 12918 }, { "epoch": 1.3260467980295567, "grad_norm": 0.036451369524002075, "learning_rate": 0.01, "loss": 2.0634, "step": 12921 }, { "epoch": 1.3263546798029557, "grad_norm": 0.03659353405237198, "learning_rate": 0.01, "loss": 2.0725, "step": 12924 }, { "epoch": 1.3266625615763548, "grad_norm": 0.049371130764484406, "learning_rate": 0.01, "loss": 2.0531, "step": 12927 }, { "epoch": 1.3269704433497536, "grad_norm": 0.05998126044869423, "learning_rate": 0.01, "loss": 2.0894, "step": 12930 }, { "epoch": 1.3272783251231526, "grad_norm": 0.08878383040428162, "learning_rate": 0.01, "loss": 2.0627, "step": 12933 }, { "epoch": 1.3275862068965516, "grad_norm": 0.059183619916439056, "learning_rate": 0.01, "loss": 2.0829, "step": 12936 }, { "epoch": 1.3278940886699506, "grad_norm": 0.05783310905098915, "learning_rate": 0.01, "loss": 2.0972, "step": 12939 }, { "epoch": 1.3282019704433496, "grad_norm": 0.07206647843122482, "learning_rate": 0.01, "loss": 2.0667, "step": 12942 }, { "epoch": 1.3285098522167487, "grad_norm": 0.07303550839424133, "learning_rate": 0.01, "loss": 2.0745, "step": 12945 }, { "epoch": 1.3288177339901477, "grad_norm": 0.04319525510072708, "learning_rate": 0.01, "loss": 2.0673, "step": 12948 }, { "epoch": 1.3291256157635467, "grad_norm": 0.044222913682460785, "learning_rate": 0.01, "loss": 2.0856, "step": 12951 }, { "epoch": 1.3294334975369457, "grad_norm": 0.08791283518075943, "learning_rate": 0.01, "loss": 2.0841, "step": 12954 }, { "epoch": 1.3297413793103448, "grad_norm": 0.05172525718808174, "learning_rate": 0.01, "loss": 2.0828, "step": 12957 }, { "epoch": 1.3300492610837438, "grad_norm": 0.053524646908044815, "learning_rate": 0.01, "loss": 2.0504, "step": 12960 }, { "epoch": 1.3303571428571428, "grad_norm": 0.03858666867017746, "learning_rate": 0.01, "loss": 2.1045, "step": 12963 }, { "epoch": 1.3306650246305418, "grad_norm": 0.037037089467048645, "learning_rate": 0.01, "loss": 2.0742, "step": 12966 }, { "epoch": 1.3309729064039408, "grad_norm": 0.04609520733356476, "learning_rate": 0.01, "loss": 2.0474, "step": 12969 }, { "epoch": 1.3312807881773399, "grad_norm": 0.03773853927850723, "learning_rate": 0.01, "loss": 2.0573, "step": 12972 }, { "epoch": 1.3315886699507389, "grad_norm": 0.040222879499197006, "learning_rate": 0.01, "loss": 2.073, "step": 12975 }, { "epoch": 1.331896551724138, "grad_norm": 0.11522398144006729, "learning_rate": 0.01, "loss": 2.0523, "step": 12978 }, { "epoch": 1.332204433497537, "grad_norm": 0.0496886670589447, "learning_rate": 0.01, "loss": 2.0873, "step": 12981 }, { "epoch": 1.332512315270936, "grad_norm": 0.0955866202712059, "learning_rate": 0.01, "loss": 2.0735, "step": 12984 }, { "epoch": 1.332820197044335, "grad_norm": 0.06260306388139725, "learning_rate": 0.01, "loss": 2.0696, "step": 12987 }, { "epoch": 1.333128078817734, "grad_norm": 0.049784719944000244, "learning_rate": 0.01, "loss": 2.0807, "step": 12990 }, { "epoch": 1.333435960591133, "grad_norm": 0.04571852833032608, "learning_rate": 0.01, "loss": 2.0648, "step": 12993 }, { "epoch": 1.333743842364532, "grad_norm": 0.11032246053218842, "learning_rate": 0.01, "loss": 2.0777, "step": 12996 }, { "epoch": 1.334051724137931, "grad_norm": 0.04030182585120201, "learning_rate": 0.01, "loss": 2.0491, "step": 12999 }, { "epoch": 1.33435960591133, "grad_norm": 0.06681946665048599, "learning_rate": 0.01, "loss": 2.051, "step": 13002 }, { "epoch": 1.334667487684729, "grad_norm": 0.04532696306705475, "learning_rate": 0.01, "loss": 2.0322, "step": 13005 }, { "epoch": 1.3349753694581281, "grad_norm": 0.03890594094991684, "learning_rate": 0.01, "loss": 2.0792, "step": 13008 }, { "epoch": 1.3352832512315271, "grad_norm": 0.08290864527225494, "learning_rate": 0.01, "loss": 2.0903, "step": 13011 }, { "epoch": 1.3355911330049262, "grad_norm": 0.09402919560670853, "learning_rate": 0.01, "loss": 2.0678, "step": 13014 }, { "epoch": 1.3358990147783252, "grad_norm": 0.06945643573999405, "learning_rate": 0.01, "loss": 2.0823, "step": 13017 }, { "epoch": 1.3362068965517242, "grad_norm": 0.0540471225976944, "learning_rate": 0.01, "loss": 2.0705, "step": 13020 }, { "epoch": 1.3365147783251232, "grad_norm": 0.04104168713092804, "learning_rate": 0.01, "loss": 2.0808, "step": 13023 }, { "epoch": 1.3368226600985222, "grad_norm": 0.04578167945146561, "learning_rate": 0.01, "loss": 2.0821, "step": 13026 }, { "epoch": 1.3371305418719213, "grad_norm": 0.07289981096982956, "learning_rate": 0.01, "loss": 2.0686, "step": 13029 }, { "epoch": 1.3374384236453203, "grad_norm": 0.09114310890436172, "learning_rate": 0.01, "loss": 2.0435, "step": 13032 }, { "epoch": 1.3377463054187193, "grad_norm": 0.06305088102817535, "learning_rate": 0.01, "loss": 2.0593, "step": 13035 }, { "epoch": 1.3380541871921183, "grad_norm": 0.06495746225118637, "learning_rate": 0.01, "loss": 2.0728, "step": 13038 }, { "epoch": 1.3383620689655173, "grad_norm": 0.05586539953947067, "learning_rate": 0.01, "loss": 2.0747, "step": 13041 }, { "epoch": 1.3386699507389164, "grad_norm": 0.04566524177789688, "learning_rate": 0.01, "loss": 2.0576, "step": 13044 }, { "epoch": 1.3389778325123154, "grad_norm": 0.060839373618364334, "learning_rate": 0.01, "loss": 2.0717, "step": 13047 }, { "epoch": 1.3392857142857144, "grad_norm": 0.11224903166294098, "learning_rate": 0.01, "loss": 2.0878, "step": 13050 }, { "epoch": 1.3395935960591134, "grad_norm": 0.09223728626966476, "learning_rate": 0.01, "loss": 2.0531, "step": 13053 }, { "epoch": 1.3399014778325122, "grad_norm": 0.0413731187582016, "learning_rate": 0.01, "loss": 2.0495, "step": 13056 }, { "epoch": 1.3402093596059113, "grad_norm": 0.050937358289957047, "learning_rate": 0.01, "loss": 2.0774, "step": 13059 }, { "epoch": 1.3405172413793103, "grad_norm": 0.0407971516251564, "learning_rate": 0.01, "loss": 2.0766, "step": 13062 }, { "epoch": 1.3408251231527093, "grad_norm": 0.0623883455991745, "learning_rate": 0.01, "loss": 2.0691, "step": 13065 }, { "epoch": 1.3411330049261083, "grad_norm": 0.09325427561998367, "learning_rate": 0.01, "loss": 2.0731, "step": 13068 }, { "epoch": 1.3414408866995073, "grad_norm": 0.06965765357017517, "learning_rate": 0.01, "loss": 2.0653, "step": 13071 }, { "epoch": 1.3417487684729064, "grad_norm": 0.12671297788619995, "learning_rate": 0.01, "loss": 2.1028, "step": 13074 }, { "epoch": 1.3420566502463054, "grad_norm": 0.04154878482222557, "learning_rate": 0.01, "loss": 2.0783, "step": 13077 }, { "epoch": 1.3423645320197044, "grad_norm": 0.04698561131954193, "learning_rate": 0.01, "loss": 2.0799, "step": 13080 }, { "epoch": 1.3426724137931034, "grad_norm": 0.031127501279115677, "learning_rate": 0.01, "loss": 2.0756, "step": 13083 }, { "epoch": 1.3429802955665024, "grad_norm": 0.05258537083864212, "learning_rate": 0.01, "loss": 2.0821, "step": 13086 }, { "epoch": 1.3432881773399015, "grad_norm": 0.06848637759685516, "learning_rate": 0.01, "loss": 2.0629, "step": 13089 }, { "epoch": 1.3435960591133005, "grad_norm": 0.07738485932350159, "learning_rate": 0.01, "loss": 2.0674, "step": 13092 }, { "epoch": 1.3439039408866995, "grad_norm": 0.09635680168867111, "learning_rate": 0.01, "loss": 2.0782, "step": 13095 }, { "epoch": 1.3442118226600985, "grad_norm": 0.04388611391186714, "learning_rate": 0.01, "loss": 2.0575, "step": 13098 }, { "epoch": 1.3445197044334976, "grad_norm": 0.0776490792632103, "learning_rate": 0.01, "loss": 2.0753, "step": 13101 }, { "epoch": 1.3448275862068966, "grad_norm": 0.11331035196781158, "learning_rate": 0.01, "loss": 2.053, "step": 13104 }, { "epoch": 1.3451354679802956, "grad_norm": 0.04267279431223869, "learning_rate": 0.01, "loss": 2.0812, "step": 13107 }, { "epoch": 1.3454433497536946, "grad_norm": 0.05454112961888313, "learning_rate": 0.01, "loss": 2.0711, "step": 13110 }, { "epoch": 1.3457512315270936, "grad_norm": 0.07470305263996124, "learning_rate": 0.01, "loss": 2.0878, "step": 13113 }, { "epoch": 1.3460591133004927, "grad_norm": 0.057337477803230286, "learning_rate": 0.01, "loss": 2.0607, "step": 13116 }, { "epoch": 1.3463669950738917, "grad_norm": 0.09155120700597763, "learning_rate": 0.01, "loss": 2.0898, "step": 13119 }, { "epoch": 1.3466748768472907, "grad_norm": 0.09644894301891327, "learning_rate": 0.01, "loss": 2.0905, "step": 13122 }, { "epoch": 1.3469827586206897, "grad_norm": 0.0579628124833107, "learning_rate": 0.01, "loss": 2.0834, "step": 13125 }, { "epoch": 1.3472906403940887, "grad_norm": 0.09968624264001846, "learning_rate": 0.01, "loss": 2.0797, "step": 13128 }, { "epoch": 1.3475985221674878, "grad_norm": 0.04834052175283432, "learning_rate": 0.01, "loss": 2.0552, "step": 13131 }, { "epoch": 1.3479064039408866, "grad_norm": 0.05561887100338936, "learning_rate": 0.01, "loss": 2.0564, "step": 13134 }, { "epoch": 1.3482142857142856, "grad_norm": 0.14990638196468353, "learning_rate": 0.01, "loss": 2.0738, "step": 13137 }, { "epoch": 1.3485221674876846, "grad_norm": 0.07530777156352997, "learning_rate": 0.01, "loss": 2.0765, "step": 13140 }, { "epoch": 1.3488300492610836, "grad_norm": 0.09080106765031815, "learning_rate": 0.01, "loss": 2.083, "step": 13143 }, { "epoch": 1.3491379310344827, "grad_norm": 0.042014699429273605, "learning_rate": 0.01, "loss": 2.0608, "step": 13146 }, { "epoch": 1.3494458128078817, "grad_norm": 0.08905219286680222, "learning_rate": 0.01, "loss": 2.0657, "step": 13149 }, { "epoch": 1.3497536945812807, "grad_norm": 0.1093059629201889, "learning_rate": 0.01, "loss": 2.102, "step": 13152 }, { "epoch": 1.3500615763546797, "grad_norm": 0.09834617376327515, "learning_rate": 0.01, "loss": 2.0601, "step": 13155 }, { "epoch": 1.3503694581280787, "grad_norm": 0.0754542201757431, "learning_rate": 0.01, "loss": 2.103, "step": 13158 }, { "epoch": 1.3506773399014778, "grad_norm": 0.09639342129230499, "learning_rate": 0.01, "loss": 2.0743, "step": 13161 }, { "epoch": 1.3509852216748768, "grad_norm": 0.05084405094385147, "learning_rate": 0.01, "loss": 2.0898, "step": 13164 }, { "epoch": 1.3512931034482758, "grad_norm": 0.04796381667256355, "learning_rate": 0.01, "loss": 2.0788, "step": 13167 }, { "epoch": 1.3516009852216748, "grad_norm": 0.05373486131429672, "learning_rate": 0.01, "loss": 2.0632, "step": 13170 }, { "epoch": 1.3519088669950738, "grad_norm": 0.05145580321550369, "learning_rate": 0.01, "loss": 2.0383, "step": 13173 }, { "epoch": 1.3522167487684729, "grad_norm": 0.03861214593052864, "learning_rate": 0.01, "loss": 2.0678, "step": 13176 }, { "epoch": 1.3525246305418719, "grad_norm": 0.04394346475601196, "learning_rate": 0.01, "loss": 2.0601, "step": 13179 }, { "epoch": 1.352832512315271, "grad_norm": 0.08851804584264755, "learning_rate": 0.01, "loss": 2.0374, "step": 13182 }, { "epoch": 1.35314039408867, "grad_norm": 0.059799451380968094, "learning_rate": 0.01, "loss": 2.0735, "step": 13185 }, { "epoch": 1.353448275862069, "grad_norm": 0.13764169812202454, "learning_rate": 0.01, "loss": 2.1001, "step": 13188 }, { "epoch": 1.353756157635468, "grad_norm": 0.05652278661727905, "learning_rate": 0.01, "loss": 2.0771, "step": 13191 }, { "epoch": 1.354064039408867, "grad_norm": 0.04755775257945061, "learning_rate": 0.01, "loss": 2.0467, "step": 13194 }, { "epoch": 1.354371921182266, "grad_norm": 0.058131635189056396, "learning_rate": 0.01, "loss": 2.0884, "step": 13197 }, { "epoch": 1.354679802955665, "grad_norm": 0.041266053915023804, "learning_rate": 0.01, "loss": 2.0684, "step": 13200 }, { "epoch": 1.354987684729064, "grad_norm": 0.034990034997463226, "learning_rate": 0.01, "loss": 2.0576, "step": 13203 }, { "epoch": 1.355295566502463, "grad_norm": 0.13107064366340637, "learning_rate": 0.01, "loss": 2.0616, "step": 13206 }, { "epoch": 1.355603448275862, "grad_norm": 0.05397200584411621, "learning_rate": 0.01, "loss": 2.0953, "step": 13209 }, { "epoch": 1.3559113300492611, "grad_norm": 0.04137737303972244, "learning_rate": 0.01, "loss": 2.0538, "step": 13212 }, { "epoch": 1.3562192118226601, "grad_norm": 0.05001407861709595, "learning_rate": 0.01, "loss": 2.0809, "step": 13215 }, { "epoch": 1.3565270935960592, "grad_norm": 0.10387953370809555, "learning_rate": 0.01, "loss": 2.0818, "step": 13218 }, { "epoch": 1.3568349753694582, "grad_norm": 0.052998363971710205, "learning_rate": 0.01, "loss": 2.0711, "step": 13221 }, { "epoch": 1.3571428571428572, "grad_norm": 0.06805765628814697, "learning_rate": 0.01, "loss": 2.0604, "step": 13224 }, { "epoch": 1.3574507389162562, "grad_norm": 0.06597940623760223, "learning_rate": 0.01, "loss": 2.0701, "step": 13227 }, { "epoch": 1.3577586206896552, "grad_norm": 0.10083628445863724, "learning_rate": 0.01, "loss": 2.086, "step": 13230 }, { "epoch": 1.3580665024630543, "grad_norm": 0.05467986315488815, "learning_rate": 0.01, "loss": 2.072, "step": 13233 }, { "epoch": 1.3583743842364533, "grad_norm": 0.08951261639595032, "learning_rate": 0.01, "loss": 2.1256, "step": 13236 }, { "epoch": 1.3586822660098523, "grad_norm": 0.052532244473695755, "learning_rate": 0.01, "loss": 2.0555, "step": 13239 }, { "epoch": 1.3589901477832513, "grad_norm": 0.038159146904945374, "learning_rate": 0.01, "loss": 2.062, "step": 13242 }, { "epoch": 1.3592980295566504, "grad_norm": 0.09895820915699005, "learning_rate": 0.01, "loss": 2.0559, "step": 13245 }, { "epoch": 1.3596059113300494, "grad_norm": 0.07522387057542801, "learning_rate": 0.01, "loss": 2.0634, "step": 13248 }, { "epoch": 1.3599137931034484, "grad_norm": 0.04762687534093857, "learning_rate": 0.01, "loss": 2.0848, "step": 13251 }, { "epoch": 1.3602216748768474, "grad_norm": 0.0716032013297081, "learning_rate": 0.01, "loss": 2.0783, "step": 13254 }, { "epoch": 1.3605295566502464, "grad_norm": 0.08518968522548676, "learning_rate": 0.01, "loss": 2.0818, "step": 13257 }, { "epoch": 1.3608374384236452, "grad_norm": 0.06927520781755447, "learning_rate": 0.01, "loss": 2.0728, "step": 13260 }, { "epoch": 1.3611453201970443, "grad_norm": 0.10368376970291138, "learning_rate": 0.01, "loss": 2.0419, "step": 13263 }, { "epoch": 1.3614532019704433, "grad_norm": 0.04249117895960808, "learning_rate": 0.01, "loss": 2.0695, "step": 13266 }, { "epoch": 1.3617610837438423, "grad_norm": 0.06504488736391068, "learning_rate": 0.01, "loss": 2.0658, "step": 13269 }, { "epoch": 1.3620689655172413, "grad_norm": 0.03990466147661209, "learning_rate": 0.01, "loss": 2.0796, "step": 13272 }, { "epoch": 1.3623768472906403, "grad_norm": 0.042559072375297546, "learning_rate": 0.01, "loss": 2.0643, "step": 13275 }, { "epoch": 1.3626847290640394, "grad_norm": 0.046650230884552, "learning_rate": 0.01, "loss": 2.0756, "step": 13278 }, { "epoch": 1.3629926108374384, "grad_norm": 0.08641167730093002, "learning_rate": 0.01, "loss": 2.0638, "step": 13281 }, { "epoch": 1.3633004926108374, "grad_norm": 0.11438708007335663, "learning_rate": 0.01, "loss": 2.0559, "step": 13284 }, { "epoch": 1.3636083743842364, "grad_norm": 0.05360870808362961, "learning_rate": 0.01, "loss": 2.0534, "step": 13287 }, { "epoch": 1.3639162561576355, "grad_norm": 0.07226021587848663, "learning_rate": 0.01, "loss": 2.0569, "step": 13290 }, { "epoch": 1.3642241379310345, "grad_norm": 0.04532739892601967, "learning_rate": 0.01, "loss": 2.0504, "step": 13293 }, { "epoch": 1.3645320197044335, "grad_norm": 0.06119906157255173, "learning_rate": 0.01, "loss": 2.0507, "step": 13296 }, { "epoch": 1.3648399014778325, "grad_norm": 0.05576052516698837, "learning_rate": 0.01, "loss": 2.0454, "step": 13299 }, { "epoch": 1.3651477832512315, "grad_norm": 0.038748834282159805, "learning_rate": 0.01, "loss": 2.0632, "step": 13302 }, { "epoch": 1.3654556650246306, "grad_norm": 0.09733711183071136, "learning_rate": 0.01, "loss": 2.0733, "step": 13305 }, { "epoch": 1.3657635467980296, "grad_norm": 0.043375931680202484, "learning_rate": 0.01, "loss": 2.0629, "step": 13308 }, { "epoch": 1.3660714285714286, "grad_norm": 0.11930018663406372, "learning_rate": 0.01, "loss": 2.0931, "step": 13311 }, { "epoch": 1.3663793103448276, "grad_norm": 0.06754540652036667, "learning_rate": 0.01, "loss": 2.0575, "step": 13314 }, { "epoch": 1.3666871921182266, "grad_norm": 0.07226148992776871, "learning_rate": 0.01, "loss": 2.0786, "step": 13317 }, { "epoch": 1.3669950738916257, "grad_norm": 0.08159705251455307, "learning_rate": 0.01, "loss": 2.0594, "step": 13320 }, { "epoch": 1.3673029556650247, "grad_norm": 0.044994477182626724, "learning_rate": 0.01, "loss": 2.0782, "step": 13323 }, { "epoch": 1.3676108374384237, "grad_norm": 0.05308050662279129, "learning_rate": 0.01, "loss": 2.0645, "step": 13326 }, { "epoch": 1.3679187192118227, "grad_norm": 0.09141236543655396, "learning_rate": 0.01, "loss": 2.0801, "step": 13329 }, { "epoch": 1.3682266009852218, "grad_norm": 0.040702883154153824, "learning_rate": 0.01, "loss": 2.0497, "step": 13332 }, { "epoch": 1.3685344827586206, "grad_norm": 0.06111524999141693, "learning_rate": 0.01, "loss": 2.0633, "step": 13335 }, { "epoch": 1.3688423645320196, "grad_norm": 0.10802275687456131, "learning_rate": 0.01, "loss": 2.0541, "step": 13338 }, { "epoch": 1.3691502463054186, "grad_norm": 0.09049344807863235, "learning_rate": 0.01, "loss": 2.0636, "step": 13341 }, { "epoch": 1.3694581280788176, "grad_norm": 0.055894456803798676, "learning_rate": 0.01, "loss": 2.0675, "step": 13344 }, { "epoch": 1.3697660098522166, "grad_norm": 0.054729919880628586, "learning_rate": 0.01, "loss": 2.0674, "step": 13347 }, { "epoch": 1.3700738916256157, "grad_norm": 0.05745011568069458, "learning_rate": 0.01, "loss": 2.0707, "step": 13350 }, { "epoch": 1.3703817733990147, "grad_norm": 0.06573651731014252, "learning_rate": 0.01, "loss": 2.0493, "step": 13353 }, { "epoch": 1.3706896551724137, "grad_norm": 0.20495210587978363, "learning_rate": 0.01, "loss": 2.0798, "step": 13356 }, { "epoch": 1.3709975369458127, "grad_norm": 0.10678639262914658, "learning_rate": 0.01, "loss": 2.0499, "step": 13359 }, { "epoch": 1.3713054187192117, "grad_norm": 0.10948281735181808, "learning_rate": 0.01, "loss": 2.087, "step": 13362 }, { "epoch": 1.3716133004926108, "grad_norm": 0.07788719981908798, "learning_rate": 0.01, "loss": 2.0745, "step": 13365 }, { "epoch": 1.3719211822660098, "grad_norm": 0.04947768524289131, "learning_rate": 0.01, "loss": 2.0608, "step": 13368 }, { "epoch": 1.3722290640394088, "grad_norm": 0.04789843037724495, "learning_rate": 0.01, "loss": 2.0607, "step": 13371 }, { "epoch": 1.3725369458128078, "grad_norm": 0.05217898637056351, "learning_rate": 0.01, "loss": 2.058, "step": 13374 }, { "epoch": 1.3728448275862069, "grad_norm": 0.04018987715244293, "learning_rate": 0.01, "loss": 2.0782, "step": 13377 }, { "epoch": 1.3731527093596059, "grad_norm": 0.035446904599666595, "learning_rate": 0.01, "loss": 2.0668, "step": 13380 }, { "epoch": 1.373460591133005, "grad_norm": 0.038600854575634, "learning_rate": 0.01, "loss": 2.0865, "step": 13383 }, { "epoch": 1.373768472906404, "grad_norm": 0.055341143161058426, "learning_rate": 0.01, "loss": 2.0591, "step": 13386 }, { "epoch": 1.374076354679803, "grad_norm": 0.11673317849636078, "learning_rate": 0.01, "loss": 2.0897, "step": 13389 }, { "epoch": 1.374384236453202, "grad_norm": 0.06797752529382706, "learning_rate": 0.01, "loss": 2.0676, "step": 13392 }, { "epoch": 1.374692118226601, "grad_norm": 0.041885413229465485, "learning_rate": 0.01, "loss": 2.0245, "step": 13395 }, { "epoch": 1.375, "grad_norm": 0.07391481846570969, "learning_rate": 0.01, "loss": 2.067, "step": 13398 }, { "epoch": 1.375307881773399, "grad_norm": 0.07959283888339996, "learning_rate": 0.01, "loss": 2.0192, "step": 13401 }, { "epoch": 1.375615763546798, "grad_norm": 0.09504754096269608, "learning_rate": 0.01, "loss": 2.0841, "step": 13404 }, { "epoch": 1.375923645320197, "grad_norm": 0.08874071389436722, "learning_rate": 0.01, "loss": 2.0473, "step": 13407 }, { "epoch": 1.376231527093596, "grad_norm": 0.05350710079073906, "learning_rate": 0.01, "loss": 2.0763, "step": 13410 }, { "epoch": 1.376539408866995, "grad_norm": 0.06738609820604324, "learning_rate": 0.01, "loss": 2.0782, "step": 13413 }, { "epoch": 1.3768472906403941, "grad_norm": 0.04335073009133339, "learning_rate": 0.01, "loss": 2.0601, "step": 13416 }, { "epoch": 1.3771551724137931, "grad_norm": 0.056045398116111755, "learning_rate": 0.01, "loss": 2.0588, "step": 13419 }, { "epoch": 1.3774630541871922, "grad_norm": 0.06593155860900879, "learning_rate": 0.01, "loss": 2.0637, "step": 13422 }, { "epoch": 1.3777709359605912, "grad_norm": 0.08942624181509018, "learning_rate": 0.01, "loss": 2.0724, "step": 13425 }, { "epoch": 1.3780788177339902, "grad_norm": 0.10098058730363846, "learning_rate": 0.01, "loss": 2.064, "step": 13428 }, { "epoch": 1.3783866995073892, "grad_norm": 0.04804680123925209, "learning_rate": 0.01, "loss": 2.0684, "step": 13431 }, { "epoch": 1.3786945812807883, "grad_norm": 0.08567364513874054, "learning_rate": 0.01, "loss": 2.0617, "step": 13434 }, { "epoch": 1.3790024630541873, "grad_norm": 0.06027091667056084, "learning_rate": 0.01, "loss": 2.0666, "step": 13437 }, { "epoch": 1.3793103448275863, "grad_norm": 0.08809462189674377, "learning_rate": 0.01, "loss": 2.0862, "step": 13440 }, { "epoch": 1.3796182266009853, "grad_norm": 0.053466469049453735, "learning_rate": 0.01, "loss": 2.0497, "step": 13443 }, { "epoch": 1.3799261083743843, "grad_norm": 0.033619511872529984, "learning_rate": 0.01, "loss": 2.0298, "step": 13446 }, { "epoch": 1.3802339901477834, "grad_norm": 0.04768878221511841, "learning_rate": 0.01, "loss": 2.054, "step": 13449 }, { "epoch": 1.3805418719211824, "grad_norm": 0.07854757457971573, "learning_rate": 0.01, "loss": 2.0538, "step": 13452 }, { "epoch": 1.3808497536945814, "grad_norm": 0.05409559607505798, "learning_rate": 0.01, "loss": 2.082, "step": 13455 }, { "epoch": 1.3811576354679804, "grad_norm": 0.057855118066072464, "learning_rate": 0.01, "loss": 2.0814, "step": 13458 }, { "epoch": 1.3814655172413794, "grad_norm": 0.047502126544713974, "learning_rate": 0.01, "loss": 2.0955, "step": 13461 }, { "epoch": 1.3817733990147782, "grad_norm": 0.040939487516880035, "learning_rate": 0.01, "loss": 2.0431, "step": 13464 }, { "epoch": 1.3820812807881773, "grad_norm": 0.1307850480079651, "learning_rate": 0.01, "loss": 2.0801, "step": 13467 }, { "epoch": 1.3823891625615763, "grad_norm": 0.04386845603585243, "learning_rate": 0.01, "loss": 2.0662, "step": 13470 }, { "epoch": 1.3826970443349753, "grad_norm": 0.08174968510866165, "learning_rate": 0.01, "loss": 2.0544, "step": 13473 }, { "epoch": 1.3830049261083743, "grad_norm": 0.1113237589597702, "learning_rate": 0.01, "loss": 2.0411, "step": 13476 }, { "epoch": 1.3833128078817734, "grad_norm": 0.06756308674812317, "learning_rate": 0.01, "loss": 2.0813, "step": 13479 }, { "epoch": 1.3836206896551724, "grad_norm": 0.05931835621595383, "learning_rate": 0.01, "loss": 2.0544, "step": 13482 }, { "epoch": 1.3839285714285714, "grad_norm": 0.043539129197597504, "learning_rate": 0.01, "loss": 2.0528, "step": 13485 }, { "epoch": 1.3842364532019704, "grad_norm": 0.0510721355676651, "learning_rate": 0.01, "loss": 2.075, "step": 13488 }, { "epoch": 1.3845443349753694, "grad_norm": 0.10811501741409302, "learning_rate": 0.01, "loss": 2.0626, "step": 13491 }, { "epoch": 1.3848522167487685, "grad_norm": 0.08322811871767044, "learning_rate": 0.01, "loss": 2.0882, "step": 13494 }, { "epoch": 1.3851600985221675, "grad_norm": 0.05101979896426201, "learning_rate": 0.01, "loss": 2.0677, "step": 13497 }, { "epoch": 1.3854679802955665, "grad_norm": 0.036535851657390594, "learning_rate": 0.01, "loss": 2.0791, "step": 13500 }, { "epoch": 1.3857758620689655, "grad_norm": 0.05161239951848984, "learning_rate": 0.01, "loss": 2.0513, "step": 13503 }, { "epoch": 1.3860837438423645, "grad_norm": 0.06677153706550598, "learning_rate": 0.01, "loss": 2.0581, "step": 13506 }, { "epoch": 1.3863916256157636, "grad_norm": 0.04239841178059578, "learning_rate": 0.01, "loss": 2.0383, "step": 13509 }, { "epoch": 1.3866995073891626, "grad_norm": 0.04252205416560173, "learning_rate": 0.01, "loss": 2.0875, "step": 13512 }, { "epoch": 1.3870073891625616, "grad_norm": 0.10147176682949066, "learning_rate": 0.01, "loss": 2.0631, "step": 13515 }, { "epoch": 1.3873152709359606, "grad_norm": 0.046371154487133026, "learning_rate": 0.01, "loss": 2.0784, "step": 13518 }, { "epoch": 1.3876231527093597, "grad_norm": 0.0997064933180809, "learning_rate": 0.01, "loss": 2.0578, "step": 13521 }, { "epoch": 1.3879310344827587, "grad_norm": 0.05733582749962807, "learning_rate": 0.01, "loss": 2.0742, "step": 13524 }, { "epoch": 1.3882389162561577, "grad_norm": 0.05061260983347893, "learning_rate": 0.01, "loss": 2.0448, "step": 13527 }, { "epoch": 1.3885467980295567, "grad_norm": 0.04336051642894745, "learning_rate": 0.01, "loss": 2.0846, "step": 13530 }, { "epoch": 1.3888546798029557, "grad_norm": 0.04115337133407593, "learning_rate": 0.01, "loss": 2.0577, "step": 13533 }, { "epoch": 1.3891625615763548, "grad_norm": 0.04914069175720215, "learning_rate": 0.01, "loss": 2.0815, "step": 13536 }, { "epoch": 1.3894704433497536, "grad_norm": 0.0677042305469513, "learning_rate": 0.01, "loss": 2.0754, "step": 13539 }, { "epoch": 1.3897783251231526, "grad_norm": 0.04985208064317703, "learning_rate": 0.01, "loss": 2.0633, "step": 13542 }, { "epoch": 1.3900862068965516, "grad_norm": 0.08042199164628983, "learning_rate": 0.01, "loss": 2.0828, "step": 13545 }, { "epoch": 1.3903940886699506, "grad_norm": 0.03648814186453819, "learning_rate": 0.01, "loss": 2.0669, "step": 13548 }, { "epoch": 1.3907019704433496, "grad_norm": 0.03645399957895279, "learning_rate": 0.01, "loss": 2.0495, "step": 13551 }, { "epoch": 1.3910098522167487, "grad_norm": 0.04866683483123779, "learning_rate": 0.01, "loss": 2.0531, "step": 13554 }, { "epoch": 1.3913177339901477, "grad_norm": 0.07728299498558044, "learning_rate": 0.01, "loss": 2.0671, "step": 13557 }, { "epoch": 1.3916256157635467, "grad_norm": 0.12097810208797455, "learning_rate": 0.01, "loss": 2.0935, "step": 13560 }, { "epoch": 1.3919334975369457, "grad_norm": 0.12485776096582413, "learning_rate": 0.01, "loss": 2.0712, "step": 13563 }, { "epoch": 1.3922413793103448, "grad_norm": 0.053524475544691086, "learning_rate": 0.01, "loss": 2.0595, "step": 13566 }, { "epoch": 1.3925492610837438, "grad_norm": 0.04277713969349861, "learning_rate": 0.01, "loss": 2.0791, "step": 13569 }, { "epoch": 1.3928571428571428, "grad_norm": 0.09847384691238403, "learning_rate": 0.01, "loss": 2.0754, "step": 13572 }, { "epoch": 1.3931650246305418, "grad_norm": 0.03410463035106659, "learning_rate": 0.01, "loss": 2.0556, "step": 13575 }, { "epoch": 1.3934729064039408, "grad_norm": 0.10606678575277328, "learning_rate": 0.01, "loss": 2.0637, "step": 13578 }, { "epoch": 1.3937807881773399, "grad_norm": 0.06554549187421799, "learning_rate": 0.01, "loss": 2.0549, "step": 13581 }, { "epoch": 1.3940886699507389, "grad_norm": 0.07487329095602036, "learning_rate": 0.01, "loss": 2.0645, "step": 13584 }, { "epoch": 1.394396551724138, "grad_norm": 0.07526996731758118, "learning_rate": 0.01, "loss": 2.0733, "step": 13587 }, { "epoch": 1.394704433497537, "grad_norm": 0.0581665076315403, "learning_rate": 0.01, "loss": 2.0491, "step": 13590 }, { "epoch": 1.395012315270936, "grad_norm": 0.057513732463121414, "learning_rate": 0.01, "loss": 2.087, "step": 13593 }, { "epoch": 1.395320197044335, "grad_norm": 0.037192508578300476, "learning_rate": 0.01, "loss": 2.0487, "step": 13596 }, { "epoch": 1.395628078817734, "grad_norm": 0.0965125560760498, "learning_rate": 0.01, "loss": 2.0549, "step": 13599 }, { "epoch": 1.395935960591133, "grad_norm": 0.04594407603144646, "learning_rate": 0.01, "loss": 2.067, "step": 13602 }, { "epoch": 1.396243842364532, "grad_norm": 0.08442319929599762, "learning_rate": 0.01, "loss": 2.0627, "step": 13605 }, { "epoch": 1.396551724137931, "grad_norm": 0.08673713356256485, "learning_rate": 0.01, "loss": 2.054, "step": 13608 }, { "epoch": 1.39685960591133, "grad_norm": 0.07299968600273132, "learning_rate": 0.01, "loss": 2.0672, "step": 13611 }, { "epoch": 1.397167487684729, "grad_norm": 0.052630744874477386, "learning_rate": 0.01, "loss": 2.079, "step": 13614 }, { "epoch": 1.3974753694581281, "grad_norm": 0.0626215934753418, "learning_rate": 0.01, "loss": 2.0732, "step": 13617 }, { "epoch": 1.3977832512315271, "grad_norm": 0.0866907387971878, "learning_rate": 0.01, "loss": 2.0743, "step": 13620 }, { "epoch": 1.3980911330049262, "grad_norm": 0.05650071054697037, "learning_rate": 0.01, "loss": 2.0856, "step": 13623 }, { "epoch": 1.3983990147783252, "grad_norm": 0.07526635378599167, "learning_rate": 0.01, "loss": 2.0707, "step": 13626 }, { "epoch": 1.3987068965517242, "grad_norm": 0.07472112774848938, "learning_rate": 0.01, "loss": 2.0608, "step": 13629 }, { "epoch": 1.3990147783251232, "grad_norm": 0.07251216471195221, "learning_rate": 0.01, "loss": 2.086, "step": 13632 }, { "epoch": 1.3993226600985222, "grad_norm": 0.08701921999454498, "learning_rate": 0.01, "loss": 2.0382, "step": 13635 }, { "epoch": 1.3996305418719213, "grad_norm": 0.033323436975479126, "learning_rate": 0.01, "loss": 2.0716, "step": 13638 }, { "epoch": 1.3999384236453203, "grad_norm": 0.04960713908076286, "learning_rate": 0.01, "loss": 2.0557, "step": 13641 }, { "epoch": 1.4002463054187193, "grad_norm": 0.1418198049068451, "learning_rate": 0.01, "loss": 2.0528, "step": 13644 }, { "epoch": 1.4005541871921183, "grad_norm": 0.06056910380721092, "learning_rate": 0.01, "loss": 2.0825, "step": 13647 }, { "epoch": 1.4008620689655173, "grad_norm": 0.062474992126226425, "learning_rate": 0.01, "loss": 2.0195, "step": 13650 }, { "epoch": 1.4011699507389164, "grad_norm": 0.05380658805370331, "learning_rate": 0.01, "loss": 2.0644, "step": 13653 }, { "epoch": 1.4014778325123154, "grad_norm": 0.046230513602495193, "learning_rate": 0.01, "loss": 2.062, "step": 13656 }, { "epoch": 1.4017857142857144, "grad_norm": 0.05238807573914528, "learning_rate": 0.01, "loss": 2.0976, "step": 13659 }, { "epoch": 1.4020935960591134, "grad_norm": 0.045423392206430435, "learning_rate": 0.01, "loss": 2.0729, "step": 13662 }, { "epoch": 1.4024014778325122, "grad_norm": 0.1077577993273735, "learning_rate": 0.01, "loss": 2.0481, "step": 13665 }, { "epoch": 1.4027093596059113, "grad_norm": 0.04370421916246414, "learning_rate": 0.01, "loss": 2.0634, "step": 13668 }, { "epoch": 1.4030172413793103, "grad_norm": 0.061496302485466, "learning_rate": 0.01, "loss": 2.0449, "step": 13671 }, { "epoch": 1.4033251231527093, "grad_norm": 0.048742834478616714, "learning_rate": 0.01, "loss": 2.0683, "step": 13674 }, { "epoch": 1.4036330049261083, "grad_norm": 0.14942513406276703, "learning_rate": 0.01, "loss": 2.0616, "step": 13677 }, { "epoch": 1.4039408866995073, "grad_norm": 0.04235846549272537, "learning_rate": 0.01, "loss": 2.0611, "step": 13680 }, { "epoch": 1.4042487684729064, "grad_norm": 0.05509978160262108, "learning_rate": 0.01, "loss": 2.0645, "step": 13683 }, { "epoch": 1.4045566502463054, "grad_norm": 0.09692233055830002, "learning_rate": 0.01, "loss": 2.0703, "step": 13686 }, { "epoch": 1.4048645320197044, "grad_norm": 0.11141908913850784, "learning_rate": 0.01, "loss": 2.0724, "step": 13689 }, { "epoch": 1.4051724137931034, "grad_norm": 0.06601562350988388, "learning_rate": 0.01, "loss": 2.0719, "step": 13692 }, { "epoch": 1.4054802955665024, "grad_norm": 0.04997260868549347, "learning_rate": 0.01, "loss": 2.056, "step": 13695 }, { "epoch": 1.4057881773399015, "grad_norm": 0.07198163866996765, "learning_rate": 0.01, "loss": 2.0546, "step": 13698 }, { "epoch": 1.4060960591133005, "grad_norm": 0.03802650794386864, "learning_rate": 0.01, "loss": 2.0696, "step": 13701 }, { "epoch": 1.4064039408866995, "grad_norm": 0.06259030848741531, "learning_rate": 0.01, "loss": 2.0459, "step": 13704 }, { "epoch": 1.4067118226600985, "grad_norm": 0.09554235637187958, "learning_rate": 0.01, "loss": 2.0476, "step": 13707 }, { "epoch": 1.4070197044334976, "grad_norm": 0.056935038417577744, "learning_rate": 0.01, "loss": 2.0522, "step": 13710 }, { "epoch": 1.4073275862068966, "grad_norm": 0.11038411408662796, "learning_rate": 0.01, "loss": 2.0567, "step": 13713 }, { "epoch": 1.4076354679802956, "grad_norm": 0.05257488042116165, "learning_rate": 0.01, "loss": 2.09, "step": 13716 }, { "epoch": 1.4079433497536946, "grad_norm": 0.0573866032063961, "learning_rate": 0.01, "loss": 2.0538, "step": 13719 }, { "epoch": 1.4082512315270936, "grad_norm": 0.04933631047606468, "learning_rate": 0.01, "loss": 2.0435, "step": 13722 }, { "epoch": 1.4085591133004927, "grad_norm": 0.05909980088472366, "learning_rate": 0.01, "loss": 2.0554, "step": 13725 }, { "epoch": 1.4088669950738917, "grad_norm": 0.09598751366138458, "learning_rate": 0.01, "loss": 2.0718, "step": 13728 }, { "epoch": 1.4091748768472907, "grad_norm": 0.05608231574296951, "learning_rate": 0.01, "loss": 2.0621, "step": 13731 }, { "epoch": 1.4094827586206897, "grad_norm": 0.08262834697961807, "learning_rate": 0.01, "loss": 2.0661, "step": 13734 }, { "epoch": 1.4097906403940887, "grad_norm": 0.041144959628582, "learning_rate": 0.01, "loss": 2.0646, "step": 13737 }, { "epoch": 1.4100985221674878, "grad_norm": 0.03748650476336479, "learning_rate": 0.01, "loss": 2.0558, "step": 13740 }, { "epoch": 1.4104064039408866, "grad_norm": 0.04054822400212288, "learning_rate": 0.01, "loss": 2.0564, "step": 13743 }, { "epoch": 1.4107142857142856, "grad_norm": 0.07961263507604599, "learning_rate": 0.01, "loss": 2.0513, "step": 13746 }, { "epoch": 1.4110221674876846, "grad_norm": 0.049971841275691986, "learning_rate": 0.01, "loss": 2.033, "step": 13749 }, { "epoch": 1.4113300492610836, "grad_norm": 0.040059734135866165, "learning_rate": 0.01, "loss": 2.0791, "step": 13752 }, { "epoch": 1.4116379310344827, "grad_norm": 0.0400179885327816, "learning_rate": 0.01, "loss": 2.0496, "step": 13755 }, { "epoch": 1.4119458128078817, "grad_norm": 0.04587862268090248, "learning_rate": 0.01, "loss": 2.0572, "step": 13758 }, { "epoch": 1.4122536945812807, "grad_norm": 0.08982817828655243, "learning_rate": 0.01, "loss": 2.0864, "step": 13761 }, { "epoch": 1.4125615763546797, "grad_norm": 0.05488836392760277, "learning_rate": 0.01, "loss": 2.0529, "step": 13764 }, { "epoch": 1.4128694581280787, "grad_norm": 0.06559593975543976, "learning_rate": 0.01, "loss": 2.0564, "step": 13767 }, { "epoch": 1.4131773399014778, "grad_norm": 0.10647718608379364, "learning_rate": 0.01, "loss": 2.0404, "step": 13770 }, { "epoch": 1.4134852216748768, "grad_norm": 0.05944173410534859, "learning_rate": 0.01, "loss": 2.053, "step": 13773 }, { "epoch": 1.4137931034482758, "grad_norm": 0.05548718199133873, "learning_rate": 0.01, "loss": 2.0534, "step": 13776 }, { "epoch": 1.4141009852216748, "grad_norm": 0.0694265142083168, "learning_rate": 0.01, "loss": 2.0647, "step": 13779 }, { "epoch": 1.4144088669950738, "grad_norm": 0.10526683181524277, "learning_rate": 0.01, "loss": 2.0768, "step": 13782 }, { "epoch": 1.4147167487684729, "grad_norm": 0.08820123970508575, "learning_rate": 0.01, "loss": 2.0693, "step": 13785 }, { "epoch": 1.4150246305418719, "grad_norm": 0.04513731971383095, "learning_rate": 0.01, "loss": 2.0596, "step": 13788 }, { "epoch": 1.415332512315271, "grad_norm": 0.05737076327204704, "learning_rate": 0.01, "loss": 2.0698, "step": 13791 }, { "epoch": 1.41564039408867, "grad_norm": 0.0431799478828907, "learning_rate": 0.01, "loss": 2.0603, "step": 13794 }, { "epoch": 1.415948275862069, "grad_norm": 0.09012471139431, "learning_rate": 0.01, "loss": 2.0634, "step": 13797 }, { "epoch": 1.416256157635468, "grad_norm": 0.05895904824137688, "learning_rate": 0.01, "loss": 2.0516, "step": 13800 }, { "epoch": 1.416564039408867, "grad_norm": 0.1610986888408661, "learning_rate": 0.01, "loss": 2.0743, "step": 13803 }, { "epoch": 1.416871921182266, "grad_norm": 0.07852904498577118, "learning_rate": 0.01, "loss": 2.064, "step": 13806 }, { "epoch": 1.417179802955665, "grad_norm": 0.06620481610298157, "learning_rate": 0.01, "loss": 2.0688, "step": 13809 }, { "epoch": 1.417487684729064, "grad_norm": 0.033222537487745285, "learning_rate": 0.01, "loss": 2.0702, "step": 13812 }, { "epoch": 1.417795566502463, "grad_norm": 0.02942623570561409, "learning_rate": 0.01, "loss": 2.0654, "step": 13815 }, { "epoch": 1.418103448275862, "grad_norm": 0.03543059900403023, "learning_rate": 0.01, "loss": 2.0776, "step": 13818 }, { "epoch": 1.4184113300492611, "grad_norm": 0.13414567708969116, "learning_rate": 0.01, "loss": 2.0621, "step": 13821 }, { "epoch": 1.4187192118226601, "grad_norm": 0.06474481523036957, "learning_rate": 0.01, "loss": 2.0675, "step": 13824 }, { "epoch": 1.4190270935960592, "grad_norm": 0.11285994201898575, "learning_rate": 0.01, "loss": 2.0533, "step": 13827 }, { "epoch": 1.4193349753694582, "grad_norm": 0.05104577913880348, "learning_rate": 0.01, "loss": 2.0835, "step": 13830 }, { "epoch": 1.4196428571428572, "grad_norm": 0.05463656783103943, "learning_rate": 0.01, "loss": 2.0788, "step": 13833 }, { "epoch": 1.4199507389162562, "grad_norm": 0.07886187732219696, "learning_rate": 0.01, "loss": 2.0538, "step": 13836 }, { "epoch": 1.4202586206896552, "grad_norm": 0.06960279494524002, "learning_rate": 0.01, "loss": 2.0927, "step": 13839 }, { "epoch": 1.4205665024630543, "grad_norm": 0.07481426745653152, "learning_rate": 0.01, "loss": 2.0421, "step": 13842 }, { "epoch": 1.4208743842364533, "grad_norm": 0.04317006468772888, "learning_rate": 0.01, "loss": 2.0665, "step": 13845 }, { "epoch": 1.4211822660098523, "grad_norm": 0.10644064098596573, "learning_rate": 0.01, "loss": 2.0434, "step": 13848 }, { "epoch": 1.4214901477832513, "grad_norm": 0.09246213734149933, "learning_rate": 0.01, "loss": 2.0487, "step": 13851 }, { "epoch": 1.4217980295566504, "grad_norm": 0.05824518948793411, "learning_rate": 0.01, "loss": 2.0633, "step": 13854 }, { "epoch": 1.4221059113300494, "grad_norm": 0.06316854059696198, "learning_rate": 0.01, "loss": 2.076, "step": 13857 }, { "epoch": 1.4224137931034484, "grad_norm": 0.058339640498161316, "learning_rate": 0.01, "loss": 2.0702, "step": 13860 }, { "epoch": 1.4227216748768474, "grad_norm": 0.05458427220582962, "learning_rate": 0.01, "loss": 2.0585, "step": 13863 }, { "epoch": 1.4230295566502464, "grad_norm": 0.041047900915145874, "learning_rate": 0.01, "loss": 2.0793, "step": 13866 }, { "epoch": 1.4233374384236452, "grad_norm": 0.04485390707850456, "learning_rate": 0.01, "loss": 2.0723, "step": 13869 }, { "epoch": 1.4236453201970443, "grad_norm": 0.13340137898921967, "learning_rate": 0.01, "loss": 2.0555, "step": 13872 }, { "epoch": 1.4239532019704433, "grad_norm": 0.05519254505634308, "learning_rate": 0.01, "loss": 2.0931, "step": 13875 }, { "epoch": 1.4242610837438423, "grad_norm": 0.07989728450775146, "learning_rate": 0.01, "loss": 2.0435, "step": 13878 }, { "epoch": 1.4245689655172413, "grad_norm": 0.09447802603244781, "learning_rate": 0.01, "loss": 2.061, "step": 13881 }, { "epoch": 1.4248768472906403, "grad_norm": 0.05240226909518242, "learning_rate": 0.01, "loss": 2.0643, "step": 13884 }, { "epoch": 1.4251847290640394, "grad_norm": 0.07171013206243515, "learning_rate": 0.01, "loss": 2.0829, "step": 13887 }, { "epoch": 1.4254926108374384, "grad_norm": 0.05098895728588104, "learning_rate": 0.01, "loss": 2.0716, "step": 13890 }, { "epoch": 1.4258004926108374, "grad_norm": 0.08569507300853729, "learning_rate": 0.01, "loss": 2.0699, "step": 13893 }, { "epoch": 1.4261083743842364, "grad_norm": 0.09055166691541672, "learning_rate": 0.01, "loss": 2.0843, "step": 13896 }, { "epoch": 1.4264162561576355, "grad_norm": 0.03242780640721321, "learning_rate": 0.01, "loss": 2.0433, "step": 13899 }, { "epoch": 1.4267241379310345, "grad_norm": 0.04612202197313309, "learning_rate": 0.01, "loss": 2.0327, "step": 13902 }, { "epoch": 1.4270320197044335, "grad_norm": 0.05800663307309151, "learning_rate": 0.01, "loss": 2.0362, "step": 13905 }, { "epoch": 1.4273399014778325, "grad_norm": 0.04150572046637535, "learning_rate": 0.01, "loss": 2.0504, "step": 13908 }, { "epoch": 1.4276477832512315, "grad_norm": 0.08542584627866745, "learning_rate": 0.01, "loss": 2.0406, "step": 13911 }, { "epoch": 1.4279556650246306, "grad_norm": 0.11966803669929504, "learning_rate": 0.01, "loss": 2.0829, "step": 13914 }, { "epoch": 1.4282635467980296, "grad_norm": 0.12066449970006943, "learning_rate": 0.01, "loss": 2.0657, "step": 13917 }, { "epoch": 1.4285714285714286, "grad_norm": 0.04103751480579376, "learning_rate": 0.01, "loss": 2.0496, "step": 13920 }, { "epoch": 1.4288793103448276, "grad_norm": 0.05432034656405449, "learning_rate": 0.01, "loss": 2.0563, "step": 13923 }, { "epoch": 1.4291871921182266, "grad_norm": 0.03935731574892998, "learning_rate": 0.01, "loss": 2.0463, "step": 13926 }, { "epoch": 1.4294950738916257, "grad_norm": 0.1475706547498703, "learning_rate": 0.01, "loss": 2.0641, "step": 13929 }, { "epoch": 1.4298029556650247, "grad_norm": 0.06562622636556625, "learning_rate": 0.01, "loss": 2.0536, "step": 13932 }, { "epoch": 1.4301108374384237, "grad_norm": 0.051726315170526505, "learning_rate": 0.01, "loss": 2.0506, "step": 13935 }, { "epoch": 1.4304187192118227, "grad_norm": 0.0998329371213913, "learning_rate": 0.01, "loss": 2.0521, "step": 13938 }, { "epoch": 1.4307266009852218, "grad_norm": 0.04965333640575409, "learning_rate": 0.01, "loss": 2.0568, "step": 13941 }, { "epoch": 1.4310344827586206, "grad_norm": 0.04430006071925163, "learning_rate": 0.01, "loss": 2.0735, "step": 13944 }, { "epoch": 1.4313423645320196, "grad_norm": 0.05260150134563446, "learning_rate": 0.01, "loss": 2.0887, "step": 13947 }, { "epoch": 1.4316502463054186, "grad_norm": 0.04135138541460037, "learning_rate": 0.01, "loss": 2.055, "step": 13950 }, { "epoch": 1.4319581280788176, "grad_norm": 0.08347123116254807, "learning_rate": 0.01, "loss": 2.0826, "step": 13953 }, { "epoch": 1.4322660098522166, "grad_norm": 0.12328385561704636, "learning_rate": 0.01, "loss": 2.0787, "step": 13956 }, { "epoch": 1.4325738916256157, "grad_norm": 0.05809056758880615, "learning_rate": 0.01, "loss": 2.0798, "step": 13959 }, { "epoch": 1.4328817733990147, "grad_norm": 0.038590408861637115, "learning_rate": 0.01, "loss": 2.0699, "step": 13962 }, { "epoch": 1.4331896551724137, "grad_norm": 0.11158851534128189, "learning_rate": 0.01, "loss": 2.0707, "step": 13965 }, { "epoch": 1.4334975369458127, "grad_norm": 0.0880589634180069, "learning_rate": 0.01, "loss": 2.0425, "step": 13968 }, { "epoch": 1.4338054187192117, "grad_norm": 0.059966232627630234, "learning_rate": 0.01, "loss": 2.0659, "step": 13971 }, { "epoch": 1.4341133004926108, "grad_norm": 0.04661833122372627, "learning_rate": 0.01, "loss": 2.0736, "step": 13974 }, { "epoch": 1.4344211822660098, "grad_norm": 0.04508896544575691, "learning_rate": 0.01, "loss": 2.0856, "step": 13977 }, { "epoch": 1.4347290640394088, "grad_norm": 0.051987554877996445, "learning_rate": 0.01, "loss": 2.0449, "step": 13980 }, { "epoch": 1.4350369458128078, "grad_norm": 0.04814029112458229, "learning_rate": 0.01, "loss": 2.0587, "step": 13983 }, { "epoch": 1.4353448275862069, "grad_norm": 0.09631717205047607, "learning_rate": 0.01, "loss": 2.0747, "step": 13986 }, { "epoch": 1.4356527093596059, "grad_norm": 0.06581971794366837, "learning_rate": 0.01, "loss": 2.0456, "step": 13989 }, { "epoch": 1.435960591133005, "grad_norm": 0.06483247131109238, "learning_rate": 0.01, "loss": 2.0627, "step": 13992 }, { "epoch": 1.436268472906404, "grad_norm": 0.1000155657529831, "learning_rate": 0.01, "loss": 2.0655, "step": 13995 }, { "epoch": 1.436576354679803, "grad_norm": 0.07297492027282715, "learning_rate": 0.01, "loss": 2.0686, "step": 13998 }, { "epoch": 1.436884236453202, "grad_norm": 0.054907578974962234, "learning_rate": 0.01, "loss": 2.0351, "step": 14001 }, { "epoch": 1.437192118226601, "grad_norm": 0.051127828657627106, "learning_rate": 0.01, "loss": 2.0583, "step": 14004 }, { "epoch": 1.4375, "grad_norm": 0.04157300665974617, "learning_rate": 0.01, "loss": 2.0548, "step": 14007 }, { "epoch": 1.437807881773399, "grad_norm": 0.07996746897697449, "learning_rate": 0.01, "loss": 2.0625, "step": 14010 }, { "epoch": 1.438115763546798, "grad_norm": 0.0764036774635315, "learning_rate": 0.01, "loss": 2.076, "step": 14013 }, { "epoch": 1.438423645320197, "grad_norm": 0.10736891627311707, "learning_rate": 0.01, "loss": 2.0682, "step": 14016 }, { "epoch": 1.438731527093596, "grad_norm": 0.0598980113863945, "learning_rate": 0.01, "loss": 2.0774, "step": 14019 }, { "epoch": 1.439039408866995, "grad_norm": 0.10858605802059174, "learning_rate": 0.01, "loss": 2.0708, "step": 14022 }, { "epoch": 1.4393472906403941, "grad_norm": 0.03999786823987961, "learning_rate": 0.01, "loss": 2.0521, "step": 14025 }, { "epoch": 1.4396551724137931, "grad_norm": 0.053138673305511475, "learning_rate": 0.01, "loss": 2.0547, "step": 14028 }, { "epoch": 1.4399630541871922, "grad_norm": 0.06477091461420059, "learning_rate": 0.01, "loss": 2.0371, "step": 14031 }, { "epoch": 1.4402709359605912, "grad_norm": 0.035987384617328644, "learning_rate": 0.01, "loss": 2.0897, "step": 14034 }, { "epoch": 1.4405788177339902, "grad_norm": 0.06938667595386505, "learning_rate": 0.01, "loss": 2.0895, "step": 14037 }, { "epoch": 1.4408866995073892, "grad_norm": 0.041746124625205994, "learning_rate": 0.01, "loss": 2.0705, "step": 14040 }, { "epoch": 1.4411945812807883, "grad_norm": 0.04503123462200165, "learning_rate": 0.01, "loss": 2.0528, "step": 14043 }, { "epoch": 1.4415024630541873, "grad_norm": 0.04153100401163101, "learning_rate": 0.01, "loss": 2.0596, "step": 14046 }, { "epoch": 1.4418103448275863, "grad_norm": 0.04992615804076195, "learning_rate": 0.01, "loss": 2.0527, "step": 14049 }, { "epoch": 1.4421182266009853, "grad_norm": 0.0718725174665451, "learning_rate": 0.01, "loss": 2.049, "step": 14052 }, { "epoch": 1.4424261083743843, "grad_norm": 0.13080431520938873, "learning_rate": 0.01, "loss": 2.0457, "step": 14055 }, { "epoch": 1.4427339901477834, "grad_norm": 0.04203762486577034, "learning_rate": 0.01, "loss": 2.0713, "step": 14058 }, { "epoch": 1.4430418719211824, "grad_norm": 0.04111120104789734, "learning_rate": 0.01, "loss": 2.0845, "step": 14061 }, { "epoch": 1.4433497536945814, "grad_norm": 0.044398125261068344, "learning_rate": 0.01, "loss": 2.0679, "step": 14064 }, { "epoch": 1.4436576354679804, "grad_norm": 0.031682152301073074, "learning_rate": 0.01, "loss": 2.0855, "step": 14067 }, { "epoch": 1.4439655172413794, "grad_norm": 0.07848865538835526, "learning_rate": 0.01, "loss": 2.0607, "step": 14070 }, { "epoch": 1.4442733990147782, "grad_norm": 0.08814079314470291, "learning_rate": 0.01, "loss": 2.0468, "step": 14073 }, { "epoch": 1.4445812807881773, "grad_norm": 0.05344429612159729, "learning_rate": 0.01, "loss": 2.0499, "step": 14076 }, { "epoch": 1.4448891625615763, "grad_norm": 0.05509471520781517, "learning_rate": 0.01, "loss": 2.0662, "step": 14079 }, { "epoch": 1.4451970443349753, "grad_norm": 0.08177798241376877, "learning_rate": 0.01, "loss": 2.0509, "step": 14082 }, { "epoch": 1.4455049261083743, "grad_norm": 0.07953787595033646, "learning_rate": 0.01, "loss": 2.0501, "step": 14085 }, { "epoch": 1.4458128078817734, "grad_norm": 0.06984551250934601, "learning_rate": 0.01, "loss": 2.0599, "step": 14088 }, { "epoch": 1.4461206896551724, "grad_norm": 0.07923319190740585, "learning_rate": 0.01, "loss": 2.06, "step": 14091 }, { "epoch": 1.4464285714285714, "grad_norm": 0.04370349645614624, "learning_rate": 0.01, "loss": 2.0839, "step": 14094 }, { "epoch": 1.4467364532019704, "grad_norm": 0.045787643641233444, "learning_rate": 0.01, "loss": 2.0512, "step": 14097 }, { "epoch": 1.4470443349753694, "grad_norm": 0.04126288741827011, "learning_rate": 0.01, "loss": 2.0603, "step": 14100 }, { "epoch": 1.4473522167487685, "grad_norm": 0.039805784821510315, "learning_rate": 0.01, "loss": 2.0561, "step": 14103 }, { "epoch": 1.4476600985221675, "grad_norm": 0.038430992513895035, "learning_rate": 0.01, "loss": 2.0697, "step": 14106 }, { "epoch": 1.4479679802955665, "grad_norm": 0.07664498686790466, "learning_rate": 0.01, "loss": 2.0381, "step": 14109 }, { "epoch": 1.4482758620689655, "grad_norm": 0.04592788219451904, "learning_rate": 0.01, "loss": 2.042, "step": 14112 }, { "epoch": 1.4485837438423645, "grad_norm": 0.06161922961473465, "learning_rate": 0.01, "loss": 2.0446, "step": 14115 }, { "epoch": 1.4488916256157636, "grad_norm": 0.07906373590230942, "learning_rate": 0.01, "loss": 2.0758, "step": 14118 }, { "epoch": 1.4491995073891626, "grad_norm": 0.09529503434896469, "learning_rate": 0.01, "loss": 2.0512, "step": 14121 }, { "epoch": 1.4495073891625616, "grad_norm": 0.05416659638285637, "learning_rate": 0.01, "loss": 2.0484, "step": 14124 }, { "epoch": 1.4498152709359606, "grad_norm": 0.07085006684064865, "learning_rate": 0.01, "loss": 2.0293, "step": 14127 }, { "epoch": 1.4501231527093597, "grad_norm": 0.07722880691289902, "learning_rate": 0.01, "loss": 2.0578, "step": 14130 }, { "epoch": 1.4504310344827587, "grad_norm": 0.06599342823028564, "learning_rate": 0.01, "loss": 2.0556, "step": 14133 }, { "epoch": 1.4507389162561577, "grad_norm": 0.11217498779296875, "learning_rate": 0.01, "loss": 2.068, "step": 14136 }, { "epoch": 1.4510467980295567, "grad_norm": 0.13082845509052277, "learning_rate": 0.01, "loss": 2.0677, "step": 14139 }, { "epoch": 1.4513546798029557, "grad_norm": 0.06812801957130432, "learning_rate": 0.01, "loss": 2.0668, "step": 14142 }, { "epoch": 1.4516625615763548, "grad_norm": 0.043554674834012985, "learning_rate": 0.01, "loss": 2.0537, "step": 14145 }, { "epoch": 1.4519704433497536, "grad_norm": 0.04550860822200775, "learning_rate": 0.01, "loss": 2.0415, "step": 14148 }, { "epoch": 1.4522783251231526, "grad_norm": 0.04838492348790169, "learning_rate": 0.01, "loss": 2.0642, "step": 14151 }, { "epoch": 1.4525862068965516, "grad_norm": 0.08433537930250168, "learning_rate": 0.01, "loss": 2.0549, "step": 14154 }, { "epoch": 1.4528940886699506, "grad_norm": 0.06986009329557419, "learning_rate": 0.01, "loss": 2.057, "step": 14157 }, { "epoch": 1.4532019704433496, "grad_norm": 0.06860263645648956, "learning_rate": 0.01, "loss": 2.0581, "step": 14160 }, { "epoch": 1.4535098522167487, "grad_norm": 0.06218327581882477, "learning_rate": 0.01, "loss": 2.0482, "step": 14163 }, { "epoch": 1.4538177339901477, "grad_norm": 0.10177832096815109, "learning_rate": 0.01, "loss": 2.059, "step": 14166 }, { "epoch": 1.4541256157635467, "grad_norm": 0.047695957124233246, "learning_rate": 0.01, "loss": 2.0372, "step": 14169 }, { "epoch": 1.4544334975369457, "grad_norm": 0.09761510044336319, "learning_rate": 0.01, "loss": 2.0671, "step": 14172 }, { "epoch": 1.4547413793103448, "grad_norm": 0.050296783447265625, "learning_rate": 0.01, "loss": 2.0536, "step": 14175 }, { "epoch": 1.4550492610837438, "grad_norm": 0.13070064783096313, "learning_rate": 0.01, "loss": 2.0579, "step": 14178 }, { "epoch": 1.4553571428571428, "grad_norm": 0.1080620214343071, "learning_rate": 0.01, "loss": 2.0455, "step": 14181 }, { "epoch": 1.4556650246305418, "grad_norm": 0.06132792308926582, "learning_rate": 0.01, "loss": 2.062, "step": 14184 }, { "epoch": 1.4559729064039408, "grad_norm": 0.07258635014295578, "learning_rate": 0.01, "loss": 2.0666, "step": 14187 }, { "epoch": 1.4562807881773399, "grad_norm": 0.05423443764448166, "learning_rate": 0.01, "loss": 2.0676, "step": 14190 }, { "epoch": 1.4565886699507389, "grad_norm": 0.06038088724017143, "learning_rate": 0.01, "loss": 2.0539, "step": 14193 }, { "epoch": 1.456896551724138, "grad_norm": 0.043958742171525955, "learning_rate": 0.01, "loss": 2.0739, "step": 14196 }, { "epoch": 1.457204433497537, "grad_norm": 0.03985238075256348, "learning_rate": 0.01, "loss": 2.0323, "step": 14199 }, { "epoch": 1.457512315270936, "grad_norm": 0.05626663193106651, "learning_rate": 0.01, "loss": 2.0518, "step": 14202 }, { "epoch": 1.457820197044335, "grad_norm": 0.06365952640771866, "learning_rate": 0.01, "loss": 2.042, "step": 14205 }, { "epoch": 1.458128078817734, "grad_norm": 0.0698857232928276, "learning_rate": 0.01, "loss": 2.0676, "step": 14208 }, { "epoch": 1.458435960591133, "grad_norm": 0.08149702101945877, "learning_rate": 0.01, "loss": 2.0659, "step": 14211 }, { "epoch": 1.458743842364532, "grad_norm": 0.055818330496549606, "learning_rate": 0.01, "loss": 2.0633, "step": 14214 }, { "epoch": 1.459051724137931, "grad_norm": 0.046251330524683, "learning_rate": 0.01, "loss": 2.0598, "step": 14217 }, { "epoch": 1.45935960591133, "grad_norm": 0.10986322909593582, "learning_rate": 0.01, "loss": 2.0768, "step": 14220 }, { "epoch": 1.459667487684729, "grad_norm": 0.06735626608133316, "learning_rate": 0.01, "loss": 2.0709, "step": 14223 }, { "epoch": 1.4599753694581281, "grad_norm": 0.1051633432507515, "learning_rate": 0.01, "loss": 2.0807, "step": 14226 }, { "epoch": 1.4602832512315271, "grad_norm": 0.0956743136048317, "learning_rate": 0.01, "loss": 2.0377, "step": 14229 }, { "epoch": 1.4605911330049262, "grad_norm": 0.04349840059876442, "learning_rate": 0.01, "loss": 2.0723, "step": 14232 }, { "epoch": 1.4608990147783252, "grad_norm": 0.0388668030500412, "learning_rate": 0.01, "loss": 2.0612, "step": 14235 }, { "epoch": 1.4612068965517242, "grad_norm": 0.04311763867735863, "learning_rate": 0.01, "loss": 2.0553, "step": 14238 }, { "epoch": 1.4615147783251232, "grad_norm": 0.09116464853286743, "learning_rate": 0.01, "loss": 2.0537, "step": 14241 }, { "epoch": 1.4618226600985222, "grad_norm": 0.08582088351249695, "learning_rate": 0.01, "loss": 2.0588, "step": 14244 }, { "epoch": 1.4621305418719213, "grad_norm": 0.0435602031648159, "learning_rate": 0.01, "loss": 2.0303, "step": 14247 }, { "epoch": 1.4624384236453203, "grad_norm": 0.067762091755867, "learning_rate": 0.01, "loss": 2.0453, "step": 14250 }, { "epoch": 1.4627463054187193, "grad_norm": 0.03980677202343941, "learning_rate": 0.01, "loss": 2.0488, "step": 14253 }, { "epoch": 1.4630541871921183, "grad_norm": 0.08521614968776703, "learning_rate": 0.01, "loss": 2.0617, "step": 14256 }, { "epoch": 1.4633620689655173, "grad_norm": 0.06770948320627213, "learning_rate": 0.01, "loss": 2.0535, "step": 14259 }, { "epoch": 1.4636699507389164, "grad_norm": 0.053458839654922485, "learning_rate": 0.01, "loss": 2.0594, "step": 14262 }, { "epoch": 1.4639778325123154, "grad_norm": 0.06733859330415726, "learning_rate": 0.01, "loss": 2.0422, "step": 14265 }, { "epoch": 1.4642857142857144, "grad_norm": 0.08033892512321472, "learning_rate": 0.01, "loss": 2.0685, "step": 14268 }, { "epoch": 1.4645935960591134, "grad_norm": 0.0832366794347763, "learning_rate": 0.01, "loss": 2.0867, "step": 14271 }, { "epoch": 1.4649014778325122, "grad_norm": 0.055291153490543365, "learning_rate": 0.01, "loss": 2.0618, "step": 14274 }, { "epoch": 1.4652093596059113, "grad_norm": 0.07180801033973694, "learning_rate": 0.01, "loss": 2.0569, "step": 14277 }, { "epoch": 1.4655172413793103, "grad_norm": 0.048950713127851486, "learning_rate": 0.01, "loss": 2.0433, "step": 14280 }, { "epoch": 1.4658251231527093, "grad_norm": 0.05428025498986244, "learning_rate": 0.01, "loss": 2.052, "step": 14283 }, { "epoch": 1.4661330049261083, "grad_norm": 0.06631309539079666, "learning_rate": 0.01, "loss": 2.0615, "step": 14286 }, { "epoch": 1.4664408866995073, "grad_norm": 0.06743253022432327, "learning_rate": 0.01, "loss": 2.0524, "step": 14289 }, { "epoch": 1.4667487684729064, "grad_norm": 0.10901882499456406, "learning_rate": 0.01, "loss": 2.076, "step": 14292 }, { "epoch": 1.4670566502463054, "grad_norm": 0.08234187960624695, "learning_rate": 0.01, "loss": 2.0432, "step": 14295 }, { "epoch": 1.4673645320197044, "grad_norm": 0.07249965518712997, "learning_rate": 0.01, "loss": 2.057, "step": 14298 }, { "epoch": 1.4676724137931034, "grad_norm": 0.0705137550830841, "learning_rate": 0.01, "loss": 2.0616, "step": 14301 }, { "epoch": 1.4679802955665024, "grad_norm": 0.10730472952127457, "learning_rate": 0.01, "loss": 2.0575, "step": 14304 }, { "epoch": 1.4682881773399015, "grad_norm": 0.048364557325839996, "learning_rate": 0.01, "loss": 2.0719, "step": 14307 }, { "epoch": 1.4685960591133005, "grad_norm": 0.03604978322982788, "learning_rate": 0.01, "loss": 2.0608, "step": 14310 }, { "epoch": 1.4689039408866995, "grad_norm": 0.09732489287853241, "learning_rate": 0.01, "loss": 2.0627, "step": 14313 }, { "epoch": 1.4692118226600985, "grad_norm": 0.06590714305639267, "learning_rate": 0.01, "loss": 2.0775, "step": 14316 }, { "epoch": 1.4695197044334976, "grad_norm": 0.075086310505867, "learning_rate": 0.01, "loss": 2.0633, "step": 14319 }, { "epoch": 1.4698275862068966, "grad_norm": 0.10288450121879578, "learning_rate": 0.01, "loss": 2.0607, "step": 14322 }, { "epoch": 1.4701354679802956, "grad_norm": 0.0535271093249321, "learning_rate": 0.01, "loss": 2.0702, "step": 14325 }, { "epoch": 1.4704433497536946, "grad_norm": 0.04609391465783119, "learning_rate": 0.01, "loss": 2.049, "step": 14328 }, { "epoch": 1.4707512315270936, "grad_norm": 0.044252909719944, "learning_rate": 0.01, "loss": 2.0586, "step": 14331 }, { "epoch": 1.4710591133004927, "grad_norm": 0.07837995141744614, "learning_rate": 0.01, "loss": 2.0541, "step": 14334 }, { "epoch": 1.4713669950738917, "grad_norm": 0.06548511236906052, "learning_rate": 0.01, "loss": 2.0575, "step": 14337 }, { "epoch": 1.4716748768472907, "grad_norm": 0.09237763285636902, "learning_rate": 0.01, "loss": 2.0607, "step": 14340 }, { "epoch": 1.4719827586206897, "grad_norm": 0.04163452237844467, "learning_rate": 0.01, "loss": 2.0604, "step": 14343 }, { "epoch": 1.4722906403940887, "grad_norm": 0.13814504444599152, "learning_rate": 0.01, "loss": 2.0667, "step": 14346 }, { "epoch": 1.4725985221674878, "grad_norm": 0.054490260779857635, "learning_rate": 0.01, "loss": 2.0754, "step": 14349 }, { "epoch": 1.4729064039408866, "grad_norm": 0.07470995932817459, "learning_rate": 0.01, "loss": 2.0731, "step": 14352 }, { "epoch": 1.4732142857142856, "grad_norm": 0.12089511752128601, "learning_rate": 0.01, "loss": 2.0671, "step": 14355 }, { "epoch": 1.4735221674876846, "grad_norm": 0.05507595092058182, "learning_rate": 0.01, "loss": 2.0278, "step": 14358 }, { "epoch": 1.4738300492610836, "grad_norm": 0.05130109563469887, "learning_rate": 0.01, "loss": 2.0491, "step": 14361 }, { "epoch": 1.4741379310344827, "grad_norm": 0.04399503022432327, "learning_rate": 0.01, "loss": 2.0669, "step": 14364 }, { "epoch": 1.4744458128078817, "grad_norm": 0.06161755695939064, "learning_rate": 0.01, "loss": 2.0458, "step": 14367 }, { "epoch": 1.4747536945812807, "grad_norm": 0.045603156089782715, "learning_rate": 0.01, "loss": 2.0477, "step": 14370 }, { "epoch": 1.4750615763546797, "grad_norm": 0.06444186717271805, "learning_rate": 0.01, "loss": 2.0514, "step": 14373 }, { "epoch": 1.4753694581280787, "grad_norm": 0.07450753450393677, "learning_rate": 0.01, "loss": 2.0327, "step": 14376 }, { "epoch": 1.4756773399014778, "grad_norm": 0.10367168486118317, "learning_rate": 0.01, "loss": 2.0453, "step": 14379 }, { "epoch": 1.4759852216748768, "grad_norm": 0.08999089151620865, "learning_rate": 0.01, "loss": 2.0588, "step": 14382 }, { "epoch": 1.4762931034482758, "grad_norm": 0.09803617745637894, "learning_rate": 0.01, "loss": 2.0972, "step": 14385 }, { "epoch": 1.4766009852216748, "grad_norm": 0.042447153478860855, "learning_rate": 0.01, "loss": 2.0295, "step": 14388 }, { "epoch": 1.4769088669950738, "grad_norm": 0.04479740187525749, "learning_rate": 0.01, "loss": 2.0545, "step": 14391 }, { "epoch": 1.4772167487684729, "grad_norm": 0.04113270714879036, "learning_rate": 0.01, "loss": 2.0522, "step": 14394 }, { "epoch": 1.4775246305418719, "grad_norm": 0.1087644025683403, "learning_rate": 0.01, "loss": 2.0668, "step": 14397 }, { "epoch": 1.477832512315271, "grad_norm": 0.05737099424004555, "learning_rate": 0.01, "loss": 2.0509, "step": 14400 }, { "epoch": 1.47814039408867, "grad_norm": 0.11025606095790863, "learning_rate": 0.01, "loss": 2.067, "step": 14403 }, { "epoch": 1.478448275862069, "grad_norm": 0.06662195175886154, "learning_rate": 0.01, "loss": 2.0554, "step": 14406 }, { "epoch": 1.478756157635468, "grad_norm": 0.05261904001235962, "learning_rate": 0.01, "loss": 2.0562, "step": 14409 }, { "epoch": 1.479064039408867, "grad_norm": 0.048272691667079926, "learning_rate": 0.01, "loss": 2.0858, "step": 14412 }, { "epoch": 1.479371921182266, "grad_norm": 0.048300545662641525, "learning_rate": 0.01, "loss": 2.0736, "step": 14415 }, { "epoch": 1.479679802955665, "grad_norm": 0.08697368204593658, "learning_rate": 0.01, "loss": 2.0425, "step": 14418 }, { "epoch": 1.479987684729064, "grad_norm": 0.07993713021278381, "learning_rate": 0.01, "loss": 2.0511, "step": 14421 }, { "epoch": 1.480295566502463, "grad_norm": 0.10037390887737274, "learning_rate": 0.01, "loss": 2.0584, "step": 14424 }, { "epoch": 1.480603448275862, "grad_norm": 0.06048484891653061, "learning_rate": 0.01, "loss": 2.0589, "step": 14427 }, { "epoch": 1.4809113300492611, "grad_norm": 0.08982612937688828, "learning_rate": 0.01, "loss": 2.0575, "step": 14430 }, { "epoch": 1.4812192118226601, "grad_norm": 0.06678975373506546, "learning_rate": 0.01, "loss": 2.0544, "step": 14433 }, { "epoch": 1.4815270935960592, "grad_norm": 0.07890944927930832, "learning_rate": 0.01, "loss": 2.0352, "step": 14436 }, { "epoch": 1.4818349753694582, "grad_norm": 0.05838685482740402, "learning_rate": 0.01, "loss": 2.0481, "step": 14439 }, { "epoch": 1.4821428571428572, "grad_norm": 0.06483394652605057, "learning_rate": 0.01, "loss": 2.0425, "step": 14442 }, { "epoch": 1.4824507389162562, "grad_norm": 0.07320713996887207, "learning_rate": 0.01, "loss": 2.0524, "step": 14445 }, { "epoch": 1.4827586206896552, "grad_norm": 0.07484092563390732, "learning_rate": 0.01, "loss": 2.0709, "step": 14448 }, { "epoch": 1.4830665024630543, "grad_norm": 0.07702804356813431, "learning_rate": 0.01, "loss": 2.0483, "step": 14451 }, { "epoch": 1.4833743842364533, "grad_norm": 0.05470692366361618, "learning_rate": 0.01, "loss": 2.0767, "step": 14454 }, { "epoch": 1.4836822660098523, "grad_norm": 0.055773910135030746, "learning_rate": 0.01, "loss": 2.0692, "step": 14457 }, { "epoch": 1.4839901477832513, "grad_norm": 0.03712743893265724, "learning_rate": 0.01, "loss": 2.0723, "step": 14460 }, { "epoch": 1.4842980295566504, "grad_norm": 0.035963475704193115, "learning_rate": 0.01, "loss": 2.0526, "step": 14463 }, { "epoch": 1.4846059113300494, "grad_norm": 0.08578921854496002, "learning_rate": 0.01, "loss": 2.0631, "step": 14466 }, { "epoch": 1.4849137931034484, "grad_norm": 0.08239159733057022, "learning_rate": 0.01, "loss": 2.0654, "step": 14469 }, { "epoch": 1.4852216748768474, "grad_norm": 0.05097891017794609, "learning_rate": 0.01, "loss": 2.0837, "step": 14472 }, { "epoch": 1.4855295566502464, "grad_norm": 0.051847904920578, "learning_rate": 0.01, "loss": 2.059, "step": 14475 }, { "epoch": 1.4858374384236452, "grad_norm": 0.04754810780286789, "learning_rate": 0.01, "loss": 2.0658, "step": 14478 }, { "epoch": 1.4861453201970443, "grad_norm": 0.046647075563669205, "learning_rate": 0.01, "loss": 2.0423, "step": 14481 }, { "epoch": 1.4864532019704433, "grad_norm": 0.06013277545571327, "learning_rate": 0.01, "loss": 2.0254, "step": 14484 }, { "epoch": 1.4867610837438423, "grad_norm": 0.13433513045310974, "learning_rate": 0.01, "loss": 2.0775, "step": 14487 }, { "epoch": 1.4870689655172413, "grad_norm": 0.046518564224243164, "learning_rate": 0.01, "loss": 2.0434, "step": 14490 }, { "epoch": 1.4873768472906403, "grad_norm": 0.09483514726161957, "learning_rate": 0.01, "loss": 2.0839, "step": 14493 }, { "epoch": 1.4876847290640394, "grad_norm": 0.07147302478551865, "learning_rate": 0.01, "loss": 2.0741, "step": 14496 }, { "epoch": 1.4879926108374384, "grad_norm": 0.12423846870660782, "learning_rate": 0.01, "loss": 2.045, "step": 14499 }, { "epoch": 1.4883004926108374, "grad_norm": 0.07726770639419556, "learning_rate": 0.01, "loss": 2.0578, "step": 14502 }, { "epoch": 1.4886083743842364, "grad_norm": 0.059802982956171036, "learning_rate": 0.01, "loss": 2.0526, "step": 14505 }, { "epoch": 1.4889162561576355, "grad_norm": 0.050745993852615356, "learning_rate": 0.01, "loss": 2.049, "step": 14508 }, { "epoch": 1.4892241379310345, "grad_norm": 0.052064161747694016, "learning_rate": 0.01, "loss": 2.0575, "step": 14511 }, { "epoch": 1.4895320197044335, "grad_norm": 0.06646674871444702, "learning_rate": 0.01, "loss": 2.0689, "step": 14514 }, { "epoch": 1.4898399014778325, "grad_norm": 0.043484605848789215, "learning_rate": 0.01, "loss": 2.0655, "step": 14517 }, { "epoch": 1.4901477832512315, "grad_norm": 0.14452145993709564, "learning_rate": 0.01, "loss": 2.0599, "step": 14520 }, { "epoch": 1.4904556650246306, "grad_norm": 0.08289093524217606, "learning_rate": 0.01, "loss": 2.0654, "step": 14523 }, { "epoch": 1.4907635467980296, "grad_norm": 0.05047908052802086, "learning_rate": 0.01, "loss": 2.0409, "step": 14526 }, { "epoch": 1.4910714285714286, "grad_norm": 0.04830252006649971, "learning_rate": 0.01, "loss": 2.0529, "step": 14529 }, { "epoch": 1.4913793103448276, "grad_norm": 0.0430610254406929, "learning_rate": 0.01, "loss": 2.0242, "step": 14532 }, { "epoch": 1.4916871921182266, "grad_norm": 0.04282008111476898, "learning_rate": 0.01, "loss": 2.0494, "step": 14535 }, { "epoch": 1.4919950738916257, "grad_norm": 0.037373676896095276, "learning_rate": 0.01, "loss": 2.0528, "step": 14538 }, { "epoch": 1.4923029556650247, "grad_norm": 0.04186755418777466, "learning_rate": 0.01, "loss": 2.058, "step": 14541 }, { "epoch": 1.4926108374384237, "grad_norm": 0.05514196678996086, "learning_rate": 0.01, "loss": 2.0647, "step": 14544 }, { "epoch": 1.4929187192118227, "grad_norm": 0.07391703873872757, "learning_rate": 0.01, "loss": 2.0812, "step": 14547 }, { "epoch": 1.4932266009852218, "grad_norm": 0.1295444518327713, "learning_rate": 0.01, "loss": 2.0571, "step": 14550 }, { "epoch": 1.4935344827586206, "grad_norm": 0.06389490514993668, "learning_rate": 0.01, "loss": 2.0756, "step": 14553 }, { "epoch": 1.4938423645320196, "grad_norm": 0.09335155785083771, "learning_rate": 0.01, "loss": 2.0904, "step": 14556 }, { "epoch": 1.4941502463054186, "grad_norm": 0.059700366109609604, "learning_rate": 0.01, "loss": 2.0598, "step": 14559 }, { "epoch": 1.4944581280788176, "grad_norm": 0.07785683870315552, "learning_rate": 0.01, "loss": 2.0567, "step": 14562 }, { "epoch": 1.4947660098522166, "grad_norm": 0.11935362964868546, "learning_rate": 0.01, "loss": 2.0536, "step": 14565 }, { "epoch": 1.4950738916256157, "grad_norm": 0.06188122183084488, "learning_rate": 0.01, "loss": 2.0436, "step": 14568 }, { "epoch": 1.4953817733990147, "grad_norm": 0.05302932485938072, "learning_rate": 0.01, "loss": 2.0704, "step": 14571 }, { "epoch": 1.4956896551724137, "grad_norm": 0.03871694207191467, "learning_rate": 0.01, "loss": 2.062, "step": 14574 }, { "epoch": 1.4959975369458127, "grad_norm": 0.03942064568400383, "learning_rate": 0.01, "loss": 2.0725, "step": 14577 }, { "epoch": 1.4963054187192117, "grad_norm": 0.05354088917374611, "learning_rate": 0.01, "loss": 2.054, "step": 14580 }, { "epoch": 1.4966133004926108, "grad_norm": 0.07863521575927734, "learning_rate": 0.01, "loss": 2.0577, "step": 14583 }, { "epoch": 1.4969211822660098, "grad_norm": 0.0440685860812664, "learning_rate": 0.01, "loss": 2.0422, "step": 14586 }, { "epoch": 1.4972290640394088, "grad_norm": 0.0724552571773529, "learning_rate": 0.01, "loss": 2.0707, "step": 14589 }, { "epoch": 1.4975369458128078, "grad_norm": 0.06099352613091469, "learning_rate": 0.01, "loss": 2.0567, "step": 14592 }, { "epoch": 1.4978448275862069, "grad_norm": 0.05534674599766731, "learning_rate": 0.01, "loss": 2.0343, "step": 14595 }, { "epoch": 1.4981527093596059, "grad_norm": 0.07876823097467422, "learning_rate": 0.01, "loss": 2.0686, "step": 14598 }, { "epoch": 1.498460591133005, "grad_norm": 0.07860377430915833, "learning_rate": 0.01, "loss": 2.0506, "step": 14601 }, { "epoch": 1.498768472906404, "grad_norm": 0.054005399346351624, "learning_rate": 0.01, "loss": 2.0429, "step": 14604 }, { "epoch": 1.499076354679803, "grad_norm": 0.10550951957702637, "learning_rate": 0.01, "loss": 2.0407, "step": 14607 }, { "epoch": 1.499384236453202, "grad_norm": 0.056426841765642166, "learning_rate": 0.01, "loss": 2.0589, "step": 14610 }, { "epoch": 1.499692118226601, "grad_norm": 0.09640904515981674, "learning_rate": 0.01, "loss": 2.0556, "step": 14613 }, { "epoch": 1.5, "grad_norm": 0.0822538211941719, "learning_rate": 0.01, "loss": 2.0684, "step": 14616 }, { "epoch": 1.500307881773399, "grad_norm": 0.05105495825409889, "learning_rate": 0.01, "loss": 2.0588, "step": 14619 }, { "epoch": 1.500615763546798, "grad_norm": 0.07851336896419525, "learning_rate": 0.01, "loss": 2.0635, "step": 14622 }, { "epoch": 1.500923645320197, "grad_norm": 0.051046207547187805, "learning_rate": 0.01, "loss": 2.0418, "step": 14625 }, { "epoch": 1.501231527093596, "grad_norm": 0.12335740774869919, "learning_rate": 0.01, "loss": 2.0483, "step": 14628 }, { "epoch": 1.501539408866995, "grad_norm": 0.04044636711478233, "learning_rate": 0.01, "loss": 2.0635, "step": 14631 }, { "epoch": 1.5018472906403941, "grad_norm": 0.0532059408724308, "learning_rate": 0.01, "loss": 2.0567, "step": 14634 }, { "epoch": 1.5021551724137931, "grad_norm": 0.0446847639977932, "learning_rate": 0.01, "loss": 2.0531, "step": 14637 }, { "epoch": 1.5024630541871922, "grad_norm": 0.05464153364300728, "learning_rate": 0.01, "loss": 2.0464, "step": 14640 }, { "epoch": 1.5027709359605912, "grad_norm": 0.08923088759183884, "learning_rate": 0.01, "loss": 2.0787, "step": 14643 }, { "epoch": 1.5030788177339902, "grad_norm": 0.06256496161222458, "learning_rate": 0.01, "loss": 2.0417, "step": 14646 }, { "epoch": 1.5033866995073892, "grad_norm": 0.05338229984045029, "learning_rate": 0.01, "loss": 2.053, "step": 14649 }, { "epoch": 1.5036945812807883, "grad_norm": 0.04416535049676895, "learning_rate": 0.01, "loss": 2.0532, "step": 14652 }, { "epoch": 1.5040024630541873, "grad_norm": 0.07076221704483032, "learning_rate": 0.01, "loss": 2.0677, "step": 14655 }, { "epoch": 1.5043103448275863, "grad_norm": 0.08566464483737946, "learning_rate": 0.01, "loss": 2.0499, "step": 14658 }, { "epoch": 1.5046182266009853, "grad_norm": 0.049552109092473984, "learning_rate": 0.01, "loss": 2.0549, "step": 14661 }, { "epoch": 1.5049261083743843, "grad_norm": 0.11802852898836136, "learning_rate": 0.01, "loss": 2.0657, "step": 14664 }, { "epoch": 1.5052339901477834, "grad_norm": 0.05280107632279396, "learning_rate": 0.01, "loss": 2.0722, "step": 14667 }, { "epoch": 1.5055418719211824, "grad_norm": 0.036458853632211685, "learning_rate": 0.01, "loss": 2.0357, "step": 14670 }, { "epoch": 1.5058497536945814, "grad_norm": 0.0465536043047905, "learning_rate": 0.01, "loss": 2.0765, "step": 14673 }, { "epoch": 1.5061576354679804, "grad_norm": 0.09052444994449615, "learning_rate": 0.01, "loss": 2.0677, "step": 14676 }, { "epoch": 1.5064655172413794, "grad_norm": 0.08750707656145096, "learning_rate": 0.01, "loss": 2.0719, "step": 14679 }, { "epoch": 1.5067733990147785, "grad_norm": 0.07876972109079361, "learning_rate": 0.01, "loss": 2.0539, "step": 14682 }, { "epoch": 1.5070812807881775, "grad_norm": 0.045561011880636215, "learning_rate": 0.01, "loss": 2.0656, "step": 14685 }, { "epoch": 1.5073891625615765, "grad_norm": 0.04548237472772598, "learning_rate": 0.01, "loss": 2.0811, "step": 14688 }, { "epoch": 1.5076970443349755, "grad_norm": 0.04897540062665939, "learning_rate": 0.01, "loss": 2.046, "step": 14691 }, { "epoch": 1.5080049261083743, "grad_norm": 0.08820399641990662, "learning_rate": 0.01, "loss": 2.0521, "step": 14694 }, { "epoch": 1.5083128078817734, "grad_norm": 0.0701432004570961, "learning_rate": 0.01, "loss": 2.0644, "step": 14697 }, { "epoch": 1.5086206896551724, "grad_norm": 0.10921904444694519, "learning_rate": 0.01, "loss": 2.0617, "step": 14700 }, { "epoch": 1.5089285714285714, "grad_norm": 0.08308566361665726, "learning_rate": 0.01, "loss": 2.101, "step": 14703 }, { "epoch": 1.5092364532019704, "grad_norm": 0.12545743584632874, "learning_rate": 0.01, "loss": 2.0495, "step": 14706 }, { "epoch": 1.5095443349753694, "grad_norm": 0.11245466768741608, "learning_rate": 0.01, "loss": 2.0264, "step": 14709 }, { "epoch": 1.5098522167487685, "grad_norm": 0.10128718614578247, "learning_rate": 0.01, "loss": 2.0413, "step": 14712 }, { "epoch": 1.5101600985221675, "grad_norm": 0.07226911187171936, "learning_rate": 0.01, "loss": 2.0487, "step": 14715 }, { "epoch": 1.5104679802955665, "grad_norm": 0.056605782359838486, "learning_rate": 0.01, "loss": 2.0686, "step": 14718 }, { "epoch": 1.5107758620689655, "grad_norm": 0.08795683085918427, "learning_rate": 0.01, "loss": 2.053, "step": 14721 }, { "epoch": 1.5110837438423645, "grad_norm": 0.07311341166496277, "learning_rate": 0.01, "loss": 2.0164, "step": 14724 }, { "epoch": 1.5113916256157636, "grad_norm": 0.07164688408374786, "learning_rate": 0.01, "loss": 2.048, "step": 14727 }, { "epoch": 1.5116995073891626, "grad_norm": 0.04577312618494034, "learning_rate": 0.01, "loss": 2.0523, "step": 14730 }, { "epoch": 1.5120073891625616, "grad_norm": 0.043186552822589874, "learning_rate": 0.01, "loss": 2.0694, "step": 14733 }, { "epoch": 1.5123152709359606, "grad_norm": 0.06101042777299881, "learning_rate": 0.01, "loss": 2.072, "step": 14736 }, { "epoch": 1.5126231527093597, "grad_norm": 0.12651608884334564, "learning_rate": 0.01, "loss": 2.0914, "step": 14739 }, { "epoch": 1.5129310344827587, "grad_norm": 0.039495594799518585, "learning_rate": 0.01, "loss": 2.0331, "step": 14742 }, { "epoch": 1.5132389162561575, "grad_norm": 0.10854054987430573, "learning_rate": 0.01, "loss": 2.0585, "step": 14745 }, { "epoch": 1.5135467980295565, "grad_norm": 0.142778679728508, "learning_rate": 0.01, "loss": 2.0433, "step": 14748 }, { "epoch": 1.5138546798029555, "grad_norm": 0.06473375856876373, "learning_rate": 0.01, "loss": 2.075, "step": 14751 }, { "epoch": 1.5141625615763545, "grad_norm": 0.050436701625585556, "learning_rate": 0.01, "loss": 2.0823, "step": 14754 }, { "epoch": 1.5144704433497536, "grad_norm": 0.057088159024715424, "learning_rate": 0.01, "loss": 2.0578, "step": 14757 }, { "epoch": 1.5147783251231526, "grad_norm": 0.051623161882162094, "learning_rate": 0.01, "loss": 2.0816, "step": 14760 }, { "epoch": 1.5150862068965516, "grad_norm": 0.0770149901509285, "learning_rate": 0.01, "loss": 2.0583, "step": 14763 }, { "epoch": 1.5153940886699506, "grad_norm": 0.06827536970376968, "learning_rate": 0.01, "loss": 2.0782, "step": 14766 }, { "epoch": 1.5157019704433496, "grad_norm": 0.06987358629703522, "learning_rate": 0.01, "loss": 2.0618, "step": 14769 }, { "epoch": 1.5160098522167487, "grad_norm": 0.05388219282031059, "learning_rate": 0.01, "loss": 2.084, "step": 14772 }, { "epoch": 1.5163177339901477, "grad_norm": 0.11866139620542526, "learning_rate": 0.01, "loss": 2.0622, "step": 14775 }, { "epoch": 1.5166256157635467, "grad_norm": 0.12754911184310913, "learning_rate": 0.01, "loss": 2.0387, "step": 14778 }, { "epoch": 1.5169334975369457, "grad_norm": 0.03591502830386162, "learning_rate": 0.01, "loss": 2.043, "step": 14781 }, { "epoch": 1.5172413793103448, "grad_norm": 0.09142038226127625, "learning_rate": 0.01, "loss": 2.05, "step": 14784 }, { "epoch": 1.5175492610837438, "grad_norm": 0.13140954077243805, "learning_rate": 0.01, "loss": 2.0302, "step": 14787 }, { "epoch": 1.5178571428571428, "grad_norm": 0.08330459147691727, "learning_rate": 0.01, "loss": 2.0461, "step": 14790 }, { "epoch": 1.5181650246305418, "grad_norm": 0.0779498815536499, "learning_rate": 0.01, "loss": 2.0662, "step": 14793 }, { "epoch": 1.5184729064039408, "grad_norm": 0.05396762117743492, "learning_rate": 0.01, "loss": 2.0586, "step": 14796 }, { "epoch": 1.5187807881773399, "grad_norm": 0.06744614988565445, "learning_rate": 0.01, "loss": 2.0578, "step": 14799 }, { "epoch": 1.5190886699507389, "grad_norm": 0.04777420684695244, "learning_rate": 0.01, "loss": 2.0645, "step": 14802 }, { "epoch": 1.519396551724138, "grad_norm": 0.044643301516771317, "learning_rate": 0.01, "loss": 2.0691, "step": 14805 }, { "epoch": 1.519704433497537, "grad_norm": 0.05263877660036087, "learning_rate": 0.01, "loss": 2.0475, "step": 14808 }, { "epoch": 1.520012315270936, "grad_norm": 0.07794903963804245, "learning_rate": 0.01, "loss": 2.0685, "step": 14811 }, { "epoch": 1.520320197044335, "grad_norm": 0.03846001625061035, "learning_rate": 0.01, "loss": 2.0547, "step": 14814 }, { "epoch": 1.520628078817734, "grad_norm": 0.03806301951408386, "learning_rate": 0.01, "loss": 2.0701, "step": 14817 }, { "epoch": 1.520935960591133, "grad_norm": 0.08289408683776855, "learning_rate": 0.01, "loss": 2.0712, "step": 14820 }, { "epoch": 1.521243842364532, "grad_norm": 0.04307285323739052, "learning_rate": 0.01, "loss": 2.0926, "step": 14823 }, { "epoch": 1.521551724137931, "grad_norm": 0.04523704573512077, "learning_rate": 0.01, "loss": 2.0613, "step": 14826 }, { "epoch": 1.52185960591133, "grad_norm": 0.0813162624835968, "learning_rate": 0.01, "loss": 2.0516, "step": 14829 }, { "epoch": 1.522167487684729, "grad_norm": 0.08958449214696884, "learning_rate": 0.01, "loss": 2.0534, "step": 14832 }, { "epoch": 1.5224753694581281, "grad_norm": 0.1036042720079422, "learning_rate": 0.01, "loss": 2.051, "step": 14835 }, { "epoch": 1.5227832512315271, "grad_norm": 0.06528764218091965, "learning_rate": 0.01, "loss": 2.0389, "step": 14838 }, { "epoch": 1.5230911330049262, "grad_norm": 0.04857415333390236, "learning_rate": 0.01, "loss": 2.0215, "step": 14841 }, { "epoch": 1.5233990147783252, "grad_norm": 0.11137302964925766, "learning_rate": 0.01, "loss": 2.0706, "step": 14844 }, { "epoch": 1.5237068965517242, "grad_norm": 0.05258537456393242, "learning_rate": 0.01, "loss": 2.053, "step": 14847 }, { "epoch": 1.5240147783251232, "grad_norm": 0.05203690007328987, "learning_rate": 0.01, "loss": 2.0719, "step": 14850 }, { "epoch": 1.5243226600985222, "grad_norm": 0.036557264626026154, "learning_rate": 0.01, "loss": 2.0827, "step": 14853 }, { "epoch": 1.5246305418719213, "grad_norm": 0.05553048849105835, "learning_rate": 0.01, "loss": 2.0526, "step": 14856 }, { "epoch": 1.5249384236453203, "grad_norm": 0.07551626116037369, "learning_rate": 0.01, "loss": 2.0696, "step": 14859 }, { "epoch": 1.5252463054187193, "grad_norm": 0.09335839748382568, "learning_rate": 0.01, "loss": 2.069, "step": 14862 }, { "epoch": 1.5255541871921183, "grad_norm": 0.07123745232820511, "learning_rate": 0.01, "loss": 2.048, "step": 14865 }, { "epoch": 1.5258620689655173, "grad_norm": 0.06792188435792923, "learning_rate": 0.01, "loss": 2.059, "step": 14868 }, { "epoch": 1.5261699507389164, "grad_norm": 0.035666827112436295, "learning_rate": 0.01, "loss": 2.0585, "step": 14871 }, { "epoch": 1.5264778325123154, "grad_norm": 0.039600104093551636, "learning_rate": 0.01, "loss": 2.0447, "step": 14874 }, { "epoch": 1.5267857142857144, "grad_norm": 0.03983796760439873, "learning_rate": 0.01, "loss": 2.0607, "step": 14877 }, { "epoch": 1.5270935960591134, "grad_norm": 0.07013492286205292, "learning_rate": 0.01, "loss": 2.0502, "step": 14880 }, { "epoch": 1.5274014778325125, "grad_norm": 0.07064792513847351, "learning_rate": 0.01, "loss": 2.0328, "step": 14883 }, { "epoch": 1.5277093596059115, "grad_norm": 0.10502810031175613, "learning_rate": 0.01, "loss": 2.0494, "step": 14886 }, { "epoch": 1.5280172413793105, "grad_norm": 0.050288375467061996, "learning_rate": 0.01, "loss": 2.0316, "step": 14889 }, { "epoch": 1.5283251231527095, "grad_norm": 0.07382049411535263, "learning_rate": 0.01, "loss": 2.0414, "step": 14892 }, { "epoch": 1.5286330049261085, "grad_norm": 0.08693026751279831, "learning_rate": 0.01, "loss": 2.0428, "step": 14895 }, { "epoch": 1.5289408866995073, "grad_norm": 0.04283773526549339, "learning_rate": 0.01, "loss": 2.068, "step": 14898 }, { "epoch": 1.5292487684729064, "grad_norm": 0.1044667437672615, "learning_rate": 0.01, "loss": 2.0583, "step": 14901 }, { "epoch": 1.5295566502463054, "grad_norm": 0.06316410005092621, "learning_rate": 0.01, "loss": 2.0372, "step": 14904 }, { "epoch": 1.5298645320197044, "grad_norm": 0.04687780514359474, "learning_rate": 0.01, "loss": 2.0575, "step": 14907 }, { "epoch": 1.5301724137931034, "grad_norm": 0.04785927012562752, "learning_rate": 0.01, "loss": 2.0435, "step": 14910 }, { "epoch": 1.5304802955665024, "grad_norm": 0.03788747265934944, "learning_rate": 0.01, "loss": 2.0483, "step": 14913 }, { "epoch": 1.5307881773399015, "grad_norm": 0.05618858337402344, "learning_rate": 0.01, "loss": 2.031, "step": 14916 }, { "epoch": 1.5310960591133005, "grad_norm": 0.10059016942977905, "learning_rate": 0.01, "loss": 2.0608, "step": 14919 }, { "epoch": 1.5314039408866995, "grad_norm": 0.06718064099550247, "learning_rate": 0.01, "loss": 2.0676, "step": 14922 }, { "epoch": 1.5317118226600985, "grad_norm": 0.09006398916244507, "learning_rate": 0.01, "loss": 2.059, "step": 14925 }, { "epoch": 1.5320197044334976, "grad_norm": 0.036577485501766205, "learning_rate": 0.01, "loss": 2.0425, "step": 14928 }, { "epoch": 1.5323275862068966, "grad_norm": 0.07979925721883774, "learning_rate": 0.01, "loss": 2.0387, "step": 14931 }, { "epoch": 1.5326354679802956, "grad_norm": 0.08286473900079727, "learning_rate": 0.01, "loss": 2.0523, "step": 14934 }, { "epoch": 1.5329433497536946, "grad_norm": 0.042596206068992615, "learning_rate": 0.01, "loss": 2.0692, "step": 14937 }, { "epoch": 1.5332512315270936, "grad_norm": 0.043695200234651566, "learning_rate": 0.01, "loss": 2.0429, "step": 14940 }, { "epoch": 1.5335591133004927, "grad_norm": 0.13041776418685913, "learning_rate": 0.01, "loss": 2.0966, "step": 14943 }, { "epoch": 1.5338669950738915, "grad_norm": 0.06076871603727341, "learning_rate": 0.01, "loss": 2.081, "step": 14946 }, { "epoch": 1.5341748768472905, "grad_norm": 0.08744147419929504, "learning_rate": 0.01, "loss": 2.0698, "step": 14949 }, { "epoch": 1.5344827586206895, "grad_norm": 0.041992440819740295, "learning_rate": 0.01, "loss": 2.0735, "step": 14952 }, { "epoch": 1.5347906403940885, "grad_norm": 0.0773782953619957, "learning_rate": 0.01, "loss": 2.0501, "step": 14955 }, { "epoch": 1.5350985221674875, "grad_norm": 0.04371657967567444, "learning_rate": 0.01, "loss": 2.043, "step": 14958 }, { "epoch": 1.5354064039408866, "grad_norm": 0.04753347858786583, "learning_rate": 0.01, "loss": 2.0399, "step": 14961 }, { "epoch": 1.5357142857142856, "grad_norm": 0.10644037276506424, "learning_rate": 0.01, "loss": 2.0838, "step": 14964 }, { "epoch": 1.5360221674876846, "grad_norm": 0.037067610770463943, "learning_rate": 0.01, "loss": 2.0677, "step": 14967 }, { "epoch": 1.5363300492610836, "grad_norm": 0.06745267659425735, "learning_rate": 0.01, "loss": 2.0464, "step": 14970 }, { "epoch": 1.5366379310344827, "grad_norm": 0.10053039342164993, "learning_rate": 0.01, "loss": 2.0696, "step": 14973 }, { "epoch": 1.5369458128078817, "grad_norm": 0.08785562962293625, "learning_rate": 0.01, "loss": 2.059, "step": 14976 }, { "epoch": 1.5372536945812807, "grad_norm": 0.12240536510944366, "learning_rate": 0.01, "loss": 2.0477, "step": 14979 }, { "epoch": 1.5375615763546797, "grad_norm": 0.08541588485240936, "learning_rate": 0.01, "loss": 2.0581, "step": 14982 }, { "epoch": 1.5378694581280787, "grad_norm": 0.0469081737101078, "learning_rate": 0.01, "loss": 2.0581, "step": 14985 }, { "epoch": 1.5381773399014778, "grad_norm": 0.04396476596593857, "learning_rate": 0.01, "loss": 2.0504, "step": 14988 }, { "epoch": 1.5384852216748768, "grad_norm": 0.033920448273420334, "learning_rate": 0.01, "loss": 2.0513, "step": 14991 }, { "epoch": 1.5387931034482758, "grad_norm": 0.035798102617263794, "learning_rate": 0.01, "loss": 2.0583, "step": 14994 }, { "epoch": 1.5391009852216748, "grad_norm": 0.030788132920861244, "learning_rate": 0.01, "loss": 2.0856, "step": 14997 }, { "epoch": 1.5394088669950738, "grad_norm": 0.06127138063311577, "learning_rate": 0.01, "loss": 2.0485, "step": 15000 }, { "epoch": 1.5397167487684729, "grad_norm": 0.07792042940855026, "learning_rate": 0.01, "loss": 2.0473, "step": 15003 }, { "epoch": 1.5400246305418719, "grad_norm": 0.1915716826915741, "learning_rate": 0.01, "loss": 2.06, "step": 15006 }, { "epoch": 1.540332512315271, "grad_norm": 0.13130734860897064, "learning_rate": 0.01, "loss": 2.0458, "step": 15009 }, { "epoch": 1.54064039408867, "grad_norm": 0.08750183880329132, "learning_rate": 0.01, "loss": 2.0569, "step": 15012 }, { "epoch": 1.540948275862069, "grad_norm": 0.0678631141781807, "learning_rate": 0.01, "loss": 2.0256, "step": 15015 }, { "epoch": 1.541256157635468, "grad_norm": 0.04352593049407005, "learning_rate": 0.01, "loss": 2.0541, "step": 15018 }, { "epoch": 1.541564039408867, "grad_norm": 0.059398628771305084, "learning_rate": 0.01, "loss": 2.0542, "step": 15021 }, { "epoch": 1.541871921182266, "grad_norm": 0.09179355949163437, "learning_rate": 0.01, "loss": 2.0623, "step": 15024 }, { "epoch": 1.542179802955665, "grad_norm": 0.08243024349212646, "learning_rate": 0.01, "loss": 2.0616, "step": 15027 }, { "epoch": 1.542487684729064, "grad_norm": 0.05103360861539841, "learning_rate": 0.01, "loss": 2.0498, "step": 15030 }, { "epoch": 1.542795566502463, "grad_norm": 0.04206395894289017, "learning_rate": 0.01, "loss": 2.0676, "step": 15033 }, { "epoch": 1.543103448275862, "grad_norm": 0.03659799322485924, "learning_rate": 0.01, "loss": 2.0376, "step": 15036 }, { "epoch": 1.5434113300492611, "grad_norm": 0.1279965043067932, "learning_rate": 0.01, "loss": 2.043, "step": 15039 }, { "epoch": 1.5437192118226601, "grad_norm": 0.09509512782096863, "learning_rate": 0.01, "loss": 2.0233, "step": 15042 }, { "epoch": 1.5440270935960592, "grad_norm": 0.07963217794895172, "learning_rate": 0.01, "loss": 2.0632, "step": 15045 }, { "epoch": 1.5443349753694582, "grad_norm": 0.06425557285547256, "learning_rate": 0.01, "loss": 2.0454, "step": 15048 }, { "epoch": 1.5446428571428572, "grad_norm": 0.1166144609451294, "learning_rate": 0.01, "loss": 2.0675, "step": 15051 }, { "epoch": 1.5449507389162562, "grad_norm": 0.0558270663022995, "learning_rate": 0.01, "loss": 2.0495, "step": 15054 }, { "epoch": 1.5452586206896552, "grad_norm": 0.05666494369506836, "learning_rate": 0.01, "loss": 2.0417, "step": 15057 }, { "epoch": 1.5455665024630543, "grad_norm": 0.048931702971458435, "learning_rate": 0.01, "loss": 2.0503, "step": 15060 }, { "epoch": 1.5458743842364533, "grad_norm": 0.10072410106658936, "learning_rate": 0.01, "loss": 2.0432, "step": 15063 }, { "epoch": 1.5461822660098523, "grad_norm": 0.06339754164218903, "learning_rate": 0.01, "loss": 2.048, "step": 15066 }, { "epoch": 1.5464901477832513, "grad_norm": 0.04913650080561638, "learning_rate": 0.01, "loss": 2.07, "step": 15069 }, { "epoch": 1.5467980295566504, "grad_norm": 0.1012924313545227, "learning_rate": 0.01, "loss": 2.0423, "step": 15072 }, { "epoch": 1.5471059113300494, "grad_norm": 0.048015668988227844, "learning_rate": 0.01, "loss": 2.0356, "step": 15075 }, { "epoch": 1.5474137931034484, "grad_norm": 0.09666754305362701, "learning_rate": 0.01, "loss": 2.0701, "step": 15078 }, { "epoch": 1.5477216748768474, "grad_norm": 0.07722094655036926, "learning_rate": 0.01, "loss": 2.0223, "step": 15081 }, { "epoch": 1.5480295566502464, "grad_norm": 0.06525082141160965, "learning_rate": 0.01, "loss": 2.056, "step": 15084 }, { "epoch": 1.5483374384236455, "grad_norm": 0.04979628697037697, "learning_rate": 0.01, "loss": 2.0662, "step": 15087 }, { "epoch": 1.5486453201970445, "grad_norm": 0.05903888866305351, "learning_rate": 0.01, "loss": 2.0551, "step": 15090 }, { "epoch": 1.5489532019704435, "grad_norm": 0.09098793566226959, "learning_rate": 0.01, "loss": 2.0758, "step": 15093 }, { "epoch": 1.5492610837438425, "grad_norm": 0.08262350410223007, "learning_rate": 0.01, "loss": 2.0548, "step": 15096 }, { "epoch": 1.5495689655172413, "grad_norm": 0.057414278388023376, "learning_rate": 0.01, "loss": 2.0887, "step": 15099 }, { "epoch": 1.5498768472906403, "grad_norm": 0.06718642264604568, "learning_rate": 0.01, "loss": 2.0731, "step": 15102 }, { "epoch": 1.5501847290640394, "grad_norm": 0.07351098954677582, "learning_rate": 0.01, "loss": 2.0589, "step": 15105 }, { "epoch": 1.5504926108374384, "grad_norm": 0.03318174555897713, "learning_rate": 0.01, "loss": 2.0545, "step": 15108 }, { "epoch": 1.5508004926108374, "grad_norm": 0.11198091506958008, "learning_rate": 0.01, "loss": 2.0306, "step": 15111 }, { "epoch": 1.5511083743842364, "grad_norm": 0.056512147188186646, "learning_rate": 0.01, "loss": 2.0724, "step": 15114 }, { "epoch": 1.5514162561576355, "grad_norm": 0.08460327982902527, "learning_rate": 0.01, "loss": 2.0537, "step": 15117 }, { "epoch": 1.5517241379310345, "grad_norm": 0.08536583930253983, "learning_rate": 0.01, "loss": 2.0696, "step": 15120 }, { "epoch": 1.5520320197044335, "grad_norm": 0.10857357084751129, "learning_rate": 0.01, "loss": 2.0645, "step": 15123 }, { "epoch": 1.5523399014778325, "grad_norm": 0.04923904314637184, "learning_rate": 0.01, "loss": 2.0339, "step": 15126 }, { "epoch": 1.5526477832512315, "grad_norm": 0.05313669145107269, "learning_rate": 0.01, "loss": 2.044, "step": 15129 }, { "epoch": 1.5529556650246306, "grad_norm": 0.058348409831523895, "learning_rate": 0.01, "loss": 2.0264, "step": 15132 }, { "epoch": 1.5532635467980296, "grad_norm": 0.04621830955147743, "learning_rate": 0.01, "loss": 2.0543, "step": 15135 }, { "epoch": 1.5535714285714286, "grad_norm": 0.079473577439785, "learning_rate": 0.01, "loss": 2.0494, "step": 15138 }, { "epoch": 1.5538793103448276, "grad_norm": 0.10176654160022736, "learning_rate": 0.01, "loss": 2.0569, "step": 15141 }, { "epoch": 1.5541871921182266, "grad_norm": 0.048997897654771805, "learning_rate": 0.01, "loss": 2.0832, "step": 15144 }, { "epoch": 1.5544950738916257, "grad_norm": 0.06281887739896774, "learning_rate": 0.01, "loss": 2.0446, "step": 15147 }, { "epoch": 1.5548029556650245, "grad_norm": 0.07801464200019836, "learning_rate": 0.01, "loss": 2.0608, "step": 15150 }, { "epoch": 1.5551108374384235, "grad_norm": 0.08535271883010864, "learning_rate": 0.01, "loss": 2.0597, "step": 15153 }, { "epoch": 1.5554187192118225, "grad_norm": 0.06536438316106796, "learning_rate": 0.01, "loss": 2.0491, "step": 15156 }, { "epoch": 1.5557266009852215, "grad_norm": 0.045388113707304, "learning_rate": 0.01, "loss": 2.0243, "step": 15159 }, { "epoch": 1.5560344827586206, "grad_norm": 0.041893068701028824, "learning_rate": 0.01, "loss": 2.0373, "step": 15162 }, { "epoch": 1.5563423645320196, "grad_norm": 0.04261694848537445, "learning_rate": 0.01, "loss": 2.028, "step": 15165 }, { "epoch": 1.5566502463054186, "grad_norm": 0.03123985230922699, "learning_rate": 0.01, "loss": 2.0653, "step": 15168 }, { "epoch": 1.5569581280788176, "grad_norm": 0.048562515527009964, "learning_rate": 0.01, "loss": 2.0249, "step": 15171 }, { "epoch": 1.5572660098522166, "grad_norm": 0.1343316286802292, "learning_rate": 0.01, "loss": 2.062, "step": 15174 }, { "epoch": 1.5575738916256157, "grad_norm": 0.10992839932441711, "learning_rate": 0.01, "loss": 2.0374, "step": 15177 }, { "epoch": 1.5578817733990147, "grad_norm": 0.09098651260137558, "learning_rate": 0.01, "loss": 2.0394, "step": 15180 }, { "epoch": 1.5581896551724137, "grad_norm": 0.05405926704406738, "learning_rate": 0.01, "loss": 2.0491, "step": 15183 }, { "epoch": 1.5584975369458127, "grad_norm": 0.04776093736290932, "learning_rate": 0.01, "loss": 2.0775, "step": 15186 }, { "epoch": 1.5588054187192117, "grad_norm": 0.04614724963903427, "learning_rate": 0.01, "loss": 2.0339, "step": 15189 }, { "epoch": 1.5591133004926108, "grad_norm": 0.05032865330576897, "learning_rate": 0.01, "loss": 2.0516, "step": 15192 }, { "epoch": 1.5594211822660098, "grad_norm": 0.051392171531915665, "learning_rate": 0.01, "loss": 2.0405, "step": 15195 }, { "epoch": 1.5597290640394088, "grad_norm": 0.10255944728851318, "learning_rate": 0.01, "loss": 2.0615, "step": 15198 }, { "epoch": 1.5600369458128078, "grad_norm": 0.04222560301423073, "learning_rate": 0.01, "loss": 2.0527, "step": 15201 }, { "epoch": 1.5603448275862069, "grad_norm": 0.045385826379060745, "learning_rate": 0.01, "loss": 2.0556, "step": 15204 }, { "epoch": 1.5606527093596059, "grad_norm": 0.04241577908396721, "learning_rate": 0.01, "loss": 2.0727, "step": 15207 }, { "epoch": 1.560960591133005, "grad_norm": 0.12387125194072723, "learning_rate": 0.01, "loss": 2.0337, "step": 15210 }, { "epoch": 1.561268472906404, "grad_norm": 0.14704599976539612, "learning_rate": 0.01, "loss": 2.0602, "step": 15213 }, { "epoch": 1.561576354679803, "grad_norm": 0.049915559589862823, "learning_rate": 0.01, "loss": 2.0203, "step": 15216 }, { "epoch": 1.561884236453202, "grad_norm": 0.057630617171525955, "learning_rate": 0.01, "loss": 2.0503, "step": 15219 }, { "epoch": 1.562192118226601, "grad_norm": 0.04765351861715317, "learning_rate": 0.01, "loss": 2.0763, "step": 15222 }, { "epoch": 1.5625, "grad_norm": 0.06213679164648056, "learning_rate": 0.01, "loss": 2.0831, "step": 15225 }, { "epoch": 1.562807881773399, "grad_norm": 0.07990710437297821, "learning_rate": 0.01, "loss": 2.0458, "step": 15228 }, { "epoch": 1.563115763546798, "grad_norm": 0.0683673620223999, "learning_rate": 0.01, "loss": 2.0294, "step": 15231 }, { "epoch": 1.563423645320197, "grad_norm": 0.12503905594348907, "learning_rate": 0.01, "loss": 2.0543, "step": 15234 }, { "epoch": 1.563731527093596, "grad_norm": 0.03973531350493431, "learning_rate": 0.01, "loss": 2.0474, "step": 15237 }, { "epoch": 1.564039408866995, "grad_norm": 0.07055282592773438, "learning_rate": 0.01, "loss": 2.0607, "step": 15240 }, { "epoch": 1.5643472906403941, "grad_norm": 0.06088467687368393, "learning_rate": 0.01, "loss": 2.0643, "step": 15243 }, { "epoch": 1.5646551724137931, "grad_norm": 0.06393450498580933, "learning_rate": 0.01, "loss": 2.0369, "step": 15246 }, { "epoch": 1.5649630541871922, "grad_norm": 0.08600255101919174, "learning_rate": 0.01, "loss": 2.072, "step": 15249 }, { "epoch": 1.5652709359605912, "grad_norm": 0.07075429707765579, "learning_rate": 0.01, "loss": 2.0509, "step": 15252 }, { "epoch": 1.5655788177339902, "grad_norm": 0.058057527989149094, "learning_rate": 0.01, "loss": 2.0373, "step": 15255 }, { "epoch": 1.5658866995073892, "grad_norm": 0.04670482128858566, "learning_rate": 0.01, "loss": 2.0447, "step": 15258 }, { "epoch": 1.5661945812807883, "grad_norm": 0.08971681445837021, "learning_rate": 0.01, "loss": 2.0599, "step": 15261 }, { "epoch": 1.5665024630541873, "grad_norm": 0.12580984830856323, "learning_rate": 0.01, "loss": 2.0402, "step": 15264 }, { "epoch": 1.5668103448275863, "grad_norm": 0.05133863538503647, "learning_rate": 0.01, "loss": 2.056, "step": 15267 }, { "epoch": 1.5671182266009853, "grad_norm": 0.07821512222290039, "learning_rate": 0.01, "loss": 2.0458, "step": 15270 }, { "epoch": 1.5674261083743843, "grad_norm": 0.07024712860584259, "learning_rate": 0.01, "loss": 2.0496, "step": 15273 }, { "epoch": 1.5677339901477834, "grad_norm": 0.09332927316427231, "learning_rate": 0.01, "loss": 2.0358, "step": 15276 }, { "epoch": 1.5680418719211824, "grad_norm": 0.06875135749578476, "learning_rate": 0.01, "loss": 2.0459, "step": 15279 }, { "epoch": 1.5683497536945814, "grad_norm": 0.08868546783924103, "learning_rate": 0.01, "loss": 2.0641, "step": 15282 }, { "epoch": 1.5686576354679804, "grad_norm": 0.07729046791791916, "learning_rate": 0.01, "loss": 2.0333, "step": 15285 }, { "epoch": 1.5689655172413794, "grad_norm": 0.07686775177717209, "learning_rate": 0.01, "loss": 2.0446, "step": 15288 }, { "epoch": 1.5692733990147785, "grad_norm": 0.0839667096734047, "learning_rate": 0.01, "loss": 2.0428, "step": 15291 }, { "epoch": 1.5695812807881775, "grad_norm": 0.0704370066523552, "learning_rate": 0.01, "loss": 2.0638, "step": 15294 }, { "epoch": 1.5698891625615765, "grad_norm": 0.05312497168779373, "learning_rate": 0.01, "loss": 2.029, "step": 15297 }, { "epoch": 1.5701970443349755, "grad_norm": 0.049166906625032425, "learning_rate": 0.01, "loss": 2.0544, "step": 15300 }, { "epoch": 1.5705049261083743, "grad_norm": 0.041398897767066956, "learning_rate": 0.01, "loss": 2.0652, "step": 15303 }, { "epoch": 1.5708128078817734, "grad_norm": 0.08617027848958969, "learning_rate": 0.01, "loss": 2.0675, "step": 15306 }, { "epoch": 1.5711206896551724, "grad_norm": 0.0348927266895771, "learning_rate": 0.01, "loss": 2.043, "step": 15309 }, { "epoch": 1.5714285714285714, "grad_norm": 0.060787077993154526, "learning_rate": 0.01, "loss": 2.0439, "step": 15312 }, { "epoch": 1.5717364532019704, "grad_norm": 0.050898227840662, "learning_rate": 0.01, "loss": 2.0475, "step": 15315 }, { "epoch": 1.5720443349753694, "grad_norm": 0.04594309255480766, "learning_rate": 0.01, "loss": 2.0352, "step": 15318 }, { "epoch": 1.5723522167487685, "grad_norm": 0.1161418929696083, "learning_rate": 0.01, "loss": 2.0232, "step": 15321 }, { "epoch": 1.5726600985221675, "grad_norm": 0.05419136583805084, "learning_rate": 0.01, "loss": 2.0417, "step": 15324 }, { "epoch": 1.5729679802955665, "grad_norm": 0.07661257684230804, "learning_rate": 0.01, "loss": 2.0548, "step": 15327 }, { "epoch": 1.5732758620689655, "grad_norm": 0.09760436415672302, "learning_rate": 0.01, "loss": 2.0539, "step": 15330 }, { "epoch": 1.5735837438423645, "grad_norm": 0.07211121916770935, "learning_rate": 0.01, "loss": 2.037, "step": 15333 }, { "epoch": 1.5738916256157636, "grad_norm": 0.08360971510410309, "learning_rate": 0.01, "loss": 2.0465, "step": 15336 }, { "epoch": 1.5741995073891626, "grad_norm": 0.05901337414979935, "learning_rate": 0.01, "loss": 2.0581, "step": 15339 }, { "epoch": 1.5745073891625616, "grad_norm": 0.07543767988681793, "learning_rate": 0.01, "loss": 2.0437, "step": 15342 }, { "epoch": 1.5748152709359606, "grad_norm": 0.04725690186023712, "learning_rate": 0.01, "loss": 2.0751, "step": 15345 }, { "epoch": 1.5751231527093597, "grad_norm": 0.051067035645246506, "learning_rate": 0.01, "loss": 2.0434, "step": 15348 }, { "epoch": 1.5754310344827587, "grad_norm": 0.03145357966423035, "learning_rate": 0.01, "loss": 2.0511, "step": 15351 }, { "epoch": 1.5757389162561575, "grad_norm": 0.09291981905698776, "learning_rate": 0.01, "loss": 2.0604, "step": 15354 }, { "epoch": 1.5760467980295565, "grad_norm": 0.03409574180841446, "learning_rate": 0.01, "loss": 2.0052, "step": 15357 }, { "epoch": 1.5763546798029555, "grad_norm": 0.045993223786354065, "learning_rate": 0.01, "loss": 2.0442, "step": 15360 }, { "epoch": 1.5766625615763545, "grad_norm": 0.03174202889204025, "learning_rate": 0.01, "loss": 2.0525, "step": 15363 }, { "epoch": 1.5769704433497536, "grad_norm": 0.057013627141714096, "learning_rate": 0.01, "loss": 2.0699, "step": 15366 }, { "epoch": 1.5772783251231526, "grad_norm": 0.05778640881180763, "learning_rate": 0.01, "loss": 2.0641, "step": 15369 }, { "epoch": 1.5775862068965516, "grad_norm": 0.11259103566408157, "learning_rate": 0.01, "loss": 2.0496, "step": 15372 }, { "epoch": 1.5778940886699506, "grad_norm": 0.05728684365749359, "learning_rate": 0.01, "loss": 2.0646, "step": 15375 }, { "epoch": 1.5782019704433496, "grad_norm": 0.07037964463233948, "learning_rate": 0.01, "loss": 2.072, "step": 15378 }, { "epoch": 1.5785098522167487, "grad_norm": 0.05147834122180939, "learning_rate": 0.01, "loss": 2.0339, "step": 15381 }, { "epoch": 1.5788177339901477, "grad_norm": 0.0663742870092392, "learning_rate": 0.01, "loss": 2.0534, "step": 15384 }, { "epoch": 1.5791256157635467, "grad_norm": 0.04665178433060646, "learning_rate": 0.01, "loss": 2.0698, "step": 15387 }, { "epoch": 1.5794334975369457, "grad_norm": 0.037410903722047806, "learning_rate": 0.01, "loss": 2.0529, "step": 15390 }, { "epoch": 1.5797413793103448, "grad_norm": 0.03849703446030617, "learning_rate": 0.01, "loss": 2.0484, "step": 15393 }, { "epoch": 1.5800492610837438, "grad_norm": 0.10119396448135376, "learning_rate": 0.01, "loss": 2.0752, "step": 15396 }, { "epoch": 1.5803571428571428, "grad_norm": 0.03294401615858078, "learning_rate": 0.01, "loss": 2.0218, "step": 15399 }, { "epoch": 1.5806650246305418, "grad_norm": 0.06310781091451645, "learning_rate": 0.01, "loss": 2.0311, "step": 15402 }, { "epoch": 1.5809729064039408, "grad_norm": 0.050191253423690796, "learning_rate": 0.01, "loss": 2.0408, "step": 15405 }, { "epoch": 1.5812807881773399, "grad_norm": 0.09952792525291443, "learning_rate": 0.01, "loss": 2.0622, "step": 15408 }, { "epoch": 1.5815886699507389, "grad_norm": 0.12618017196655273, "learning_rate": 0.01, "loss": 2.0696, "step": 15411 }, { "epoch": 1.581896551724138, "grad_norm": 0.19296571612358093, "learning_rate": 0.01, "loss": 2.072, "step": 15414 }, { "epoch": 1.582204433497537, "grad_norm": 0.1124732494354248, "learning_rate": 0.01, "loss": 2.0451, "step": 15417 }, { "epoch": 1.582512315270936, "grad_norm": 0.06556060910224915, "learning_rate": 0.01, "loss": 2.0325, "step": 15420 }, { "epoch": 1.582820197044335, "grad_norm": 0.05607735365629196, "learning_rate": 0.01, "loss": 2.0663, "step": 15423 }, { "epoch": 1.583128078817734, "grad_norm": 0.04731295630335808, "learning_rate": 0.01, "loss": 2.0559, "step": 15426 }, { "epoch": 1.583435960591133, "grad_norm": 0.060452669858932495, "learning_rate": 0.01, "loss": 2.0498, "step": 15429 }, { "epoch": 1.583743842364532, "grad_norm": 0.056996360421180725, "learning_rate": 0.01, "loss": 2.0752, "step": 15432 }, { "epoch": 1.584051724137931, "grad_norm": 0.11382313817739487, "learning_rate": 0.01, "loss": 2.0723, "step": 15435 }, { "epoch": 1.58435960591133, "grad_norm": 0.05176008865237236, "learning_rate": 0.01, "loss": 2.0706, "step": 15438 }, { "epoch": 1.584667487684729, "grad_norm": 0.05329972878098488, "learning_rate": 0.01, "loss": 2.0473, "step": 15441 }, { "epoch": 1.5849753694581281, "grad_norm": 0.05416284501552582, "learning_rate": 0.01, "loss": 2.0585, "step": 15444 }, { "epoch": 1.5852832512315271, "grad_norm": 0.052267350256443024, "learning_rate": 0.01, "loss": 2.0314, "step": 15447 }, { "epoch": 1.5855911330049262, "grad_norm": 0.1025579646229744, "learning_rate": 0.01, "loss": 2.0412, "step": 15450 }, { "epoch": 1.5858990147783252, "grad_norm": 0.06416940689086914, "learning_rate": 0.01, "loss": 2.0312, "step": 15453 }, { "epoch": 1.5862068965517242, "grad_norm": 0.05699596926569939, "learning_rate": 0.01, "loss": 2.0678, "step": 15456 }, { "epoch": 1.5865147783251232, "grad_norm": 0.036711812019348145, "learning_rate": 0.01, "loss": 2.0251, "step": 15459 }, { "epoch": 1.5868226600985222, "grad_norm": 0.1025582030415535, "learning_rate": 0.01, "loss": 2.0387, "step": 15462 }, { "epoch": 1.5871305418719213, "grad_norm": 0.03923096880316734, "learning_rate": 0.01, "loss": 2.067, "step": 15465 }, { "epoch": 1.5874384236453203, "grad_norm": 0.08267144113779068, "learning_rate": 0.01, "loss": 2.0583, "step": 15468 }, { "epoch": 1.5877463054187193, "grad_norm": 0.15374930202960968, "learning_rate": 0.01, "loss": 2.0384, "step": 15471 }, { "epoch": 1.5880541871921183, "grad_norm": 0.10246127098798752, "learning_rate": 0.01, "loss": 2.0696, "step": 15474 }, { "epoch": 1.5883620689655173, "grad_norm": 0.09784136712551117, "learning_rate": 0.01, "loss": 2.0546, "step": 15477 }, { "epoch": 1.5886699507389164, "grad_norm": 0.08747786283493042, "learning_rate": 0.01, "loss": 2.0754, "step": 15480 }, { "epoch": 1.5889778325123154, "grad_norm": 0.0755406990647316, "learning_rate": 0.01, "loss": 2.0477, "step": 15483 }, { "epoch": 1.5892857142857144, "grad_norm": 0.05593521520495415, "learning_rate": 0.01, "loss": 2.0485, "step": 15486 }, { "epoch": 1.5895935960591134, "grad_norm": 0.04462866857647896, "learning_rate": 0.01, "loss": 2.056, "step": 15489 }, { "epoch": 1.5899014778325125, "grad_norm": 0.040571678429841995, "learning_rate": 0.01, "loss": 2.0906, "step": 15492 }, { "epoch": 1.5902093596059115, "grad_norm": 0.038100458681583405, "learning_rate": 0.01, "loss": 2.0735, "step": 15495 }, { "epoch": 1.5905172413793105, "grad_norm": 0.04336906597018242, "learning_rate": 0.01, "loss": 2.0716, "step": 15498 }, { "epoch": 1.5908251231527095, "grad_norm": 0.1424836665391922, "learning_rate": 0.01, "loss": 2.0424, "step": 15501 }, { "epoch": 1.5911330049261085, "grad_norm": 0.09235331416130066, "learning_rate": 0.01, "loss": 2.0461, "step": 15504 }, { "epoch": 1.5914408866995073, "grad_norm": 0.07932816445827484, "learning_rate": 0.01, "loss": 2.0595, "step": 15507 }, { "epoch": 1.5917487684729064, "grad_norm": 0.057297565042972565, "learning_rate": 0.01, "loss": 2.0407, "step": 15510 }, { "epoch": 1.5920566502463054, "grad_norm": 0.05323542281985283, "learning_rate": 0.01, "loss": 2.0568, "step": 15513 }, { "epoch": 1.5923645320197044, "grad_norm": 0.039465416222810745, "learning_rate": 0.01, "loss": 2.0455, "step": 15516 }, { "epoch": 1.5926724137931034, "grad_norm": 0.044614970684051514, "learning_rate": 0.01, "loss": 2.0784, "step": 15519 }, { "epoch": 1.5929802955665024, "grad_norm": 0.044074248522520065, "learning_rate": 0.01, "loss": 2.0391, "step": 15522 }, { "epoch": 1.5932881773399015, "grad_norm": 0.04098647087812424, "learning_rate": 0.01, "loss": 2.0512, "step": 15525 }, { "epoch": 1.5935960591133005, "grad_norm": 0.18400810658931732, "learning_rate": 0.01, "loss": 2.0623, "step": 15528 }, { "epoch": 1.5939039408866995, "grad_norm": 0.10264160484075546, "learning_rate": 0.01, "loss": 2.0546, "step": 15531 }, { "epoch": 1.5942118226600985, "grad_norm": 0.10086511820554733, "learning_rate": 0.01, "loss": 2.0671, "step": 15534 }, { "epoch": 1.5945197044334976, "grad_norm": 0.03823179379105568, "learning_rate": 0.01, "loss": 2.0496, "step": 15537 }, { "epoch": 1.5948275862068966, "grad_norm": 0.0449577271938324, "learning_rate": 0.01, "loss": 2.0635, "step": 15540 }, { "epoch": 1.5951354679802956, "grad_norm": 0.04791559278964996, "learning_rate": 0.01, "loss": 2.0596, "step": 15543 }, { "epoch": 1.5954433497536946, "grad_norm": 0.04523475095629692, "learning_rate": 0.01, "loss": 2.0457, "step": 15546 }, { "epoch": 1.5957512315270936, "grad_norm": 0.10654012113809586, "learning_rate": 0.01, "loss": 2.0172, "step": 15549 }, { "epoch": 1.5960591133004927, "grad_norm": 0.06602972745895386, "learning_rate": 0.01, "loss": 2.0565, "step": 15552 }, { "epoch": 1.5963669950738915, "grad_norm": 0.10605626553297043, "learning_rate": 0.01, "loss": 2.0792, "step": 15555 }, { "epoch": 1.5966748768472905, "grad_norm": 0.05995124578475952, "learning_rate": 0.01, "loss": 2.0367, "step": 15558 }, { "epoch": 1.5969827586206895, "grad_norm": 0.05426995828747749, "learning_rate": 0.01, "loss": 2.0458, "step": 15561 }, { "epoch": 1.5972906403940885, "grad_norm": 0.08749561756849289, "learning_rate": 0.01, "loss": 2.0509, "step": 15564 }, { "epoch": 1.5975985221674875, "grad_norm": 0.0735105574131012, "learning_rate": 0.01, "loss": 2.0548, "step": 15567 }, { "epoch": 1.5979064039408866, "grad_norm": 0.05417585372924805, "learning_rate": 0.01, "loss": 2.0304, "step": 15570 }, { "epoch": 1.5982142857142856, "grad_norm": 0.04170646145939827, "learning_rate": 0.01, "loss": 2.0751, "step": 15573 }, { "epoch": 1.5985221674876846, "grad_norm": 0.05886775627732277, "learning_rate": 0.01, "loss": 2.0419, "step": 15576 }, { "epoch": 1.5988300492610836, "grad_norm": 0.04231201857328415, "learning_rate": 0.01, "loss": 2.0531, "step": 15579 }, { "epoch": 1.5991379310344827, "grad_norm": 0.06620585918426514, "learning_rate": 0.01, "loss": 2.043, "step": 15582 }, { "epoch": 1.5994458128078817, "grad_norm": 0.10536913573741913, "learning_rate": 0.01, "loss": 2.0327, "step": 15585 }, { "epoch": 1.5997536945812807, "grad_norm": 0.14467884600162506, "learning_rate": 0.01, "loss": 2.0491, "step": 15588 }, { "epoch": 1.6000615763546797, "grad_norm": 0.11715273559093475, "learning_rate": 0.01, "loss": 2.0362, "step": 15591 }, { "epoch": 1.6003694581280787, "grad_norm": 0.04978121817111969, "learning_rate": 0.01, "loss": 2.0444, "step": 15594 }, { "epoch": 1.6006773399014778, "grad_norm": 0.06248803436756134, "learning_rate": 0.01, "loss": 2.0527, "step": 15597 }, { "epoch": 1.6009852216748768, "grad_norm": 0.05408048257231712, "learning_rate": 0.01, "loss": 2.0519, "step": 15600 }, { "epoch": 1.6012931034482758, "grad_norm": 0.05805948004126549, "learning_rate": 0.01, "loss": 2.0539, "step": 15603 }, { "epoch": 1.6016009852216748, "grad_norm": 0.03809194639325142, "learning_rate": 0.01, "loss": 2.0515, "step": 15606 }, { "epoch": 1.6019088669950738, "grad_norm": 0.07981141656637192, "learning_rate": 0.01, "loss": 2.0238, "step": 15609 }, { "epoch": 1.6022167487684729, "grad_norm": 0.04769575223326683, "learning_rate": 0.01, "loss": 2.0703, "step": 15612 }, { "epoch": 1.6025246305418719, "grad_norm": 0.09913644194602966, "learning_rate": 0.01, "loss": 2.074, "step": 15615 }, { "epoch": 1.602832512315271, "grad_norm": 0.12298569083213806, "learning_rate": 0.01, "loss": 2.0662, "step": 15618 }, { "epoch": 1.60314039408867, "grad_norm": 0.0525309219956398, "learning_rate": 0.01, "loss": 2.0411, "step": 15621 }, { "epoch": 1.603448275862069, "grad_norm": 0.07430320978164673, "learning_rate": 0.01, "loss": 2.0465, "step": 15624 }, { "epoch": 1.603756157635468, "grad_norm": 0.036753058433532715, "learning_rate": 0.01, "loss": 2.0408, "step": 15627 }, { "epoch": 1.604064039408867, "grad_norm": 0.04560523107647896, "learning_rate": 0.01, "loss": 2.074, "step": 15630 }, { "epoch": 1.604371921182266, "grad_norm": 0.07089810073375702, "learning_rate": 0.01, "loss": 2.0599, "step": 15633 }, { "epoch": 1.604679802955665, "grad_norm": 0.10833004862070084, "learning_rate": 0.01, "loss": 2.066, "step": 15636 }, { "epoch": 1.604987684729064, "grad_norm": 0.06033416837453842, "learning_rate": 0.01, "loss": 2.0893, "step": 15639 }, { "epoch": 1.605295566502463, "grad_norm": 0.06819162517786026, "learning_rate": 0.01, "loss": 2.0432, "step": 15642 }, { "epoch": 1.605603448275862, "grad_norm": 0.08949002623558044, "learning_rate": 0.01, "loss": 2.0891, "step": 15645 }, { "epoch": 1.6059113300492611, "grad_norm": 0.04749004542827606, "learning_rate": 0.01, "loss": 2.0434, "step": 15648 }, { "epoch": 1.6062192118226601, "grad_norm": 0.06903103739023209, "learning_rate": 0.01, "loss": 2.0379, "step": 15651 }, { "epoch": 1.6065270935960592, "grad_norm": 0.10074819624423981, "learning_rate": 0.01, "loss": 2.0657, "step": 15654 }, { "epoch": 1.6068349753694582, "grad_norm": 0.0390753298997879, "learning_rate": 0.01, "loss": 2.0186, "step": 15657 }, { "epoch": 1.6071428571428572, "grad_norm": 0.04776669666171074, "learning_rate": 0.01, "loss": 2.0435, "step": 15660 }, { "epoch": 1.6074507389162562, "grad_norm": 0.1191340908408165, "learning_rate": 0.01, "loss": 2.0519, "step": 15663 }, { "epoch": 1.6077586206896552, "grad_norm": 0.08326657861471176, "learning_rate": 0.01, "loss": 2.0443, "step": 15666 }, { "epoch": 1.6080665024630543, "grad_norm": 0.044734589755535126, "learning_rate": 0.01, "loss": 2.0626, "step": 15669 }, { "epoch": 1.6083743842364533, "grad_norm": 0.047262392938137054, "learning_rate": 0.01, "loss": 2.0351, "step": 15672 }, { "epoch": 1.6086822660098523, "grad_norm": 0.0908563882112503, "learning_rate": 0.01, "loss": 2.0655, "step": 15675 }, { "epoch": 1.6089901477832513, "grad_norm": 0.06681264191865921, "learning_rate": 0.01, "loss": 2.0606, "step": 15678 }, { "epoch": 1.6092980295566504, "grad_norm": 0.09569018334150314, "learning_rate": 0.01, "loss": 2.0454, "step": 15681 }, { "epoch": 1.6096059113300494, "grad_norm": 0.04303963482379913, "learning_rate": 0.01, "loss": 2.0296, "step": 15684 }, { "epoch": 1.6099137931034484, "grad_norm": 0.09924867749214172, "learning_rate": 0.01, "loss": 2.0578, "step": 15687 }, { "epoch": 1.6102216748768474, "grad_norm": 0.041328392922878265, "learning_rate": 0.01, "loss": 2.0538, "step": 15690 }, { "epoch": 1.6105295566502464, "grad_norm": 0.056367840617895126, "learning_rate": 0.01, "loss": 2.0746, "step": 15693 }, { "epoch": 1.6108374384236455, "grad_norm": 0.06074264645576477, "learning_rate": 0.01, "loss": 2.0707, "step": 15696 }, { "epoch": 1.6111453201970445, "grad_norm": 0.06541740894317627, "learning_rate": 0.01, "loss": 2.0615, "step": 15699 }, { "epoch": 1.6114532019704435, "grad_norm": 0.06279835850000381, "learning_rate": 0.01, "loss": 2.0484, "step": 15702 }, { "epoch": 1.6117610837438425, "grad_norm": 0.03825109452009201, "learning_rate": 0.01, "loss": 2.0317, "step": 15705 }, { "epoch": 1.6120689655172413, "grad_norm": 0.03792817145586014, "learning_rate": 0.01, "loss": 2.0246, "step": 15708 }, { "epoch": 1.6123768472906403, "grad_norm": 0.05229473114013672, "learning_rate": 0.01, "loss": 2.0765, "step": 15711 }, { "epoch": 1.6126847290640394, "grad_norm": 0.11285384744405746, "learning_rate": 0.01, "loss": 2.0392, "step": 15714 }, { "epoch": 1.6129926108374384, "grad_norm": 0.07333546876907349, "learning_rate": 0.01, "loss": 2.05, "step": 15717 }, { "epoch": 1.6133004926108374, "grad_norm": 0.07698936760425568, "learning_rate": 0.01, "loss": 2.0757, "step": 15720 }, { "epoch": 1.6136083743842364, "grad_norm": 0.06517963856458664, "learning_rate": 0.01, "loss": 2.032, "step": 15723 }, { "epoch": 1.6139162561576355, "grad_norm": 0.07242800295352936, "learning_rate": 0.01, "loss": 2.0398, "step": 15726 }, { "epoch": 1.6142241379310345, "grad_norm": 0.03956649452447891, "learning_rate": 0.01, "loss": 2.0437, "step": 15729 }, { "epoch": 1.6145320197044335, "grad_norm": 0.10249898582696915, "learning_rate": 0.01, "loss": 2.0716, "step": 15732 }, { "epoch": 1.6148399014778325, "grad_norm": 0.09716839343309402, "learning_rate": 0.01, "loss": 2.0288, "step": 15735 }, { "epoch": 1.6151477832512315, "grad_norm": 0.0809134840965271, "learning_rate": 0.01, "loss": 2.0553, "step": 15738 }, { "epoch": 1.6154556650246306, "grad_norm": 0.07891330122947693, "learning_rate": 0.01, "loss": 2.0622, "step": 15741 }, { "epoch": 1.6157635467980296, "grad_norm": 0.06289231032133102, "learning_rate": 0.01, "loss": 2.0382, "step": 15744 }, { "epoch": 1.6160714285714286, "grad_norm": 0.032251689583063126, "learning_rate": 0.01, "loss": 2.049, "step": 15747 }, { "epoch": 1.6163793103448276, "grad_norm": 0.032203931361436844, "learning_rate": 0.01, "loss": 2.0474, "step": 15750 }, { "epoch": 1.6166871921182266, "grad_norm": 0.042572617530822754, "learning_rate": 0.01, "loss": 2.0604, "step": 15753 }, { "epoch": 1.6169950738916257, "grad_norm": 0.06869769096374512, "learning_rate": 0.01, "loss": 2.0643, "step": 15756 }, { "epoch": 1.6173029556650245, "grad_norm": 0.09649953991174698, "learning_rate": 0.01, "loss": 2.0811, "step": 15759 }, { "epoch": 1.6176108374384235, "grad_norm": 0.060255225747823715, "learning_rate": 0.01, "loss": 2.0543, "step": 15762 }, { "epoch": 1.6179187192118225, "grad_norm": 0.0548517182469368, "learning_rate": 0.01, "loss": 2.0427, "step": 15765 }, { "epoch": 1.6182266009852215, "grad_norm": 0.09392546862363815, "learning_rate": 0.01, "loss": 2.0722, "step": 15768 }, { "epoch": 1.6185344827586206, "grad_norm": 0.052100926637649536, "learning_rate": 0.01, "loss": 2.0469, "step": 15771 }, { "epoch": 1.6188423645320196, "grad_norm": 0.05099212005734444, "learning_rate": 0.01, "loss": 2.0589, "step": 15774 }, { "epoch": 1.6191502463054186, "grad_norm": 0.0486266165971756, "learning_rate": 0.01, "loss": 2.0308, "step": 15777 }, { "epoch": 1.6194581280788176, "grad_norm": 0.044072605669498444, "learning_rate": 0.01, "loss": 2.0877, "step": 15780 }, { "epoch": 1.6197660098522166, "grad_norm": 0.09196856617927551, "learning_rate": 0.01, "loss": 2.0224, "step": 15783 }, { "epoch": 1.6200738916256157, "grad_norm": 0.05948984995484352, "learning_rate": 0.01, "loss": 2.0425, "step": 15786 }, { "epoch": 1.6203817733990147, "grad_norm": 0.043075162917375565, "learning_rate": 0.01, "loss": 2.058, "step": 15789 }, { "epoch": 1.6206896551724137, "grad_norm": 0.06739038228988647, "learning_rate": 0.01, "loss": 2.0356, "step": 15792 }, { "epoch": 1.6209975369458127, "grad_norm": 0.05961238220334053, "learning_rate": 0.01, "loss": 2.0492, "step": 15795 }, { "epoch": 1.6213054187192117, "grad_norm": 0.06527238339185715, "learning_rate": 0.01, "loss": 2.0227, "step": 15798 }, { "epoch": 1.6216133004926108, "grad_norm": 0.09234929084777832, "learning_rate": 0.01, "loss": 2.039, "step": 15801 }, { "epoch": 1.6219211822660098, "grad_norm": 0.08050446212291718, "learning_rate": 0.01, "loss": 2.0769, "step": 15804 }, { "epoch": 1.6222290640394088, "grad_norm": 0.06419754028320312, "learning_rate": 0.01, "loss": 2.0613, "step": 15807 }, { "epoch": 1.6225369458128078, "grad_norm": 0.06302323937416077, "learning_rate": 0.01, "loss": 2.0535, "step": 15810 }, { "epoch": 1.6228448275862069, "grad_norm": 0.051602717489004135, "learning_rate": 0.01, "loss": 2.0462, "step": 15813 }, { "epoch": 1.6231527093596059, "grad_norm": 0.12424405664205551, "learning_rate": 0.01, "loss": 2.0562, "step": 15816 }, { "epoch": 1.623460591133005, "grad_norm": 0.10444232821464539, "learning_rate": 0.01, "loss": 2.0527, "step": 15819 }, { "epoch": 1.623768472906404, "grad_norm": 0.06170908361673355, "learning_rate": 0.01, "loss": 2.0456, "step": 15822 }, { "epoch": 1.624076354679803, "grad_norm": 0.05145244672894478, "learning_rate": 0.01, "loss": 2.0368, "step": 15825 }, { "epoch": 1.624384236453202, "grad_norm": 0.0459282286465168, "learning_rate": 0.01, "loss": 2.0547, "step": 15828 }, { "epoch": 1.624692118226601, "grad_norm": 0.05250949412584305, "learning_rate": 0.01, "loss": 2.0475, "step": 15831 }, { "epoch": 1.625, "grad_norm": 0.03222022205591202, "learning_rate": 0.01, "loss": 2.0347, "step": 15834 }, { "epoch": 1.625307881773399, "grad_norm": 0.05849120765924454, "learning_rate": 0.01, "loss": 2.0351, "step": 15837 }, { "epoch": 1.625615763546798, "grad_norm": 0.04638088122010231, "learning_rate": 0.01, "loss": 2.0222, "step": 15840 }, { "epoch": 1.625923645320197, "grad_norm": 0.046597037464380264, "learning_rate": 0.01, "loss": 2.0577, "step": 15843 }, { "epoch": 1.626231527093596, "grad_norm": 0.10477445274591446, "learning_rate": 0.01, "loss": 2.0528, "step": 15846 }, { "epoch": 1.626539408866995, "grad_norm": 0.03439783677458763, "learning_rate": 0.01, "loss": 2.0631, "step": 15849 }, { "epoch": 1.6268472906403941, "grad_norm": 0.05810544639825821, "learning_rate": 0.01, "loss": 2.0531, "step": 15852 }, { "epoch": 1.6271551724137931, "grad_norm": 0.04557522386312485, "learning_rate": 0.01, "loss": 2.0582, "step": 15855 }, { "epoch": 1.6274630541871922, "grad_norm": 0.04236530885100365, "learning_rate": 0.01, "loss": 2.0531, "step": 15858 }, { "epoch": 1.6277709359605912, "grad_norm": 0.04338948428630829, "learning_rate": 0.01, "loss": 2.0584, "step": 15861 }, { "epoch": 1.6280788177339902, "grad_norm": 0.039782583713531494, "learning_rate": 0.01, "loss": 2.0699, "step": 15864 }, { "epoch": 1.6283866995073892, "grad_norm": 0.06858891248703003, "learning_rate": 0.01, "loss": 2.0477, "step": 15867 }, { "epoch": 1.6286945812807883, "grad_norm": 0.0510399155318737, "learning_rate": 0.01, "loss": 2.0692, "step": 15870 }, { "epoch": 1.6290024630541873, "grad_norm": 0.12568604946136475, "learning_rate": 0.01, "loss": 2.0597, "step": 15873 }, { "epoch": 1.6293103448275863, "grad_norm": 0.09245727956295013, "learning_rate": 0.01, "loss": 2.0365, "step": 15876 }, { "epoch": 1.6296182266009853, "grad_norm": 0.05763734132051468, "learning_rate": 0.01, "loss": 2.0787, "step": 15879 }, { "epoch": 1.6299261083743843, "grad_norm": 0.06099852919578552, "learning_rate": 0.01, "loss": 2.0603, "step": 15882 }, { "epoch": 1.6302339901477834, "grad_norm": 0.05738021805882454, "learning_rate": 0.01, "loss": 2.0405, "step": 15885 }, { "epoch": 1.6305418719211824, "grad_norm": 0.04953853040933609, "learning_rate": 0.01, "loss": 2.0622, "step": 15888 }, { "epoch": 1.6308497536945814, "grad_norm": 0.08572196215391159, "learning_rate": 0.01, "loss": 2.0618, "step": 15891 }, { "epoch": 1.6311576354679804, "grad_norm": 0.09245479106903076, "learning_rate": 0.01, "loss": 2.0453, "step": 15894 }, { "epoch": 1.6314655172413794, "grad_norm": 0.057964712381362915, "learning_rate": 0.01, "loss": 2.0186, "step": 15897 }, { "epoch": 1.6317733990147785, "grad_norm": 0.05189305916428566, "learning_rate": 0.01, "loss": 2.0615, "step": 15900 }, { "epoch": 1.6320812807881775, "grad_norm": 0.07327884435653687, "learning_rate": 0.01, "loss": 2.0175, "step": 15903 }, { "epoch": 1.6323891625615765, "grad_norm": 0.07089177519083023, "learning_rate": 0.01, "loss": 2.0475, "step": 15906 }, { "epoch": 1.6326970443349755, "grad_norm": 0.09783073514699936, "learning_rate": 0.01, "loss": 2.051, "step": 15909 }, { "epoch": 1.6330049261083743, "grad_norm": 0.06617991626262665, "learning_rate": 0.01, "loss": 2.0408, "step": 15912 }, { "epoch": 1.6333128078817734, "grad_norm": 0.10033921152353287, "learning_rate": 0.01, "loss": 2.0308, "step": 15915 }, { "epoch": 1.6336206896551724, "grad_norm": 0.054432835429906845, "learning_rate": 0.01, "loss": 2.0404, "step": 15918 }, { "epoch": 1.6339285714285714, "grad_norm": 0.056940387934446335, "learning_rate": 0.01, "loss": 2.0566, "step": 15921 }, { "epoch": 1.6342364532019704, "grad_norm": 0.12047278136014938, "learning_rate": 0.01, "loss": 2.0464, "step": 15924 }, { "epoch": 1.6345443349753694, "grad_norm": 0.04637087881565094, "learning_rate": 0.01, "loss": 2.0514, "step": 15927 }, { "epoch": 1.6348522167487685, "grad_norm": 0.03925006836652756, "learning_rate": 0.01, "loss": 2.0536, "step": 15930 }, { "epoch": 1.6351600985221675, "grad_norm": 0.04180562496185303, "learning_rate": 0.01, "loss": 2.0206, "step": 15933 }, { "epoch": 1.6354679802955665, "grad_norm": 0.08031155914068222, "learning_rate": 0.01, "loss": 2.0509, "step": 15936 }, { "epoch": 1.6357758620689655, "grad_norm": 0.0812869518995285, "learning_rate": 0.01, "loss": 2.021, "step": 15939 }, { "epoch": 1.6360837438423645, "grad_norm": 0.07887094467878342, "learning_rate": 0.01, "loss": 2.0554, "step": 15942 }, { "epoch": 1.6363916256157636, "grad_norm": 0.12604457139968872, "learning_rate": 0.01, "loss": 2.0236, "step": 15945 }, { "epoch": 1.6366995073891626, "grad_norm": 0.1262006163597107, "learning_rate": 0.01, "loss": 2.0806, "step": 15948 }, { "epoch": 1.6370073891625616, "grad_norm": 0.07335629314184189, "learning_rate": 0.01, "loss": 2.0339, "step": 15951 }, { "epoch": 1.6373152709359606, "grad_norm": 0.043172985315322876, "learning_rate": 0.01, "loss": 2.0455, "step": 15954 }, { "epoch": 1.6376231527093597, "grad_norm": 0.07475942373275757, "learning_rate": 0.01, "loss": 2.0842, "step": 15957 }, { "epoch": 1.6379310344827587, "grad_norm": 0.06113087013363838, "learning_rate": 0.01, "loss": 2.0526, "step": 15960 }, { "epoch": 1.6382389162561575, "grad_norm": 0.08709672093391418, "learning_rate": 0.01, "loss": 2.0886, "step": 15963 }, { "epoch": 1.6385467980295565, "grad_norm": 0.05810529738664627, "learning_rate": 0.01, "loss": 2.043, "step": 15966 }, { "epoch": 1.6388546798029555, "grad_norm": 0.0831620916724205, "learning_rate": 0.01, "loss": 2.0599, "step": 15969 }, { "epoch": 1.6391625615763545, "grad_norm": 0.040577951818704605, "learning_rate": 0.01, "loss": 2.0676, "step": 15972 }, { "epoch": 1.6394704433497536, "grad_norm": 0.03428385406732559, "learning_rate": 0.01, "loss": 2.0491, "step": 15975 }, { "epoch": 1.6397783251231526, "grad_norm": 0.04800771176815033, "learning_rate": 0.01, "loss": 2.0371, "step": 15978 }, { "epoch": 1.6400862068965516, "grad_norm": 0.05769934877753258, "learning_rate": 0.01, "loss": 2.0468, "step": 15981 }, { "epoch": 1.6403940886699506, "grad_norm": 0.08842117339372635, "learning_rate": 0.01, "loss": 2.037, "step": 15984 }, { "epoch": 1.6407019704433496, "grad_norm": 0.09740414470434189, "learning_rate": 0.01, "loss": 2.0423, "step": 15987 }, { "epoch": 1.6410098522167487, "grad_norm": 0.11128890514373779, "learning_rate": 0.01, "loss": 2.0423, "step": 15990 }, { "epoch": 1.6413177339901477, "grad_norm": 0.03690354898571968, "learning_rate": 0.01, "loss": 2.0412, "step": 15993 }, { "epoch": 1.6416256157635467, "grad_norm": 0.07311075925827026, "learning_rate": 0.01, "loss": 2.036, "step": 15996 }, { "epoch": 1.6419334975369457, "grad_norm": 0.045825451612472534, "learning_rate": 0.01, "loss": 2.0491, "step": 15999 }, { "epoch": 1.6422413793103448, "grad_norm": 0.09123300760984421, "learning_rate": 0.01, "loss": 2.0574, "step": 16002 }, { "epoch": 1.6425492610837438, "grad_norm": 0.0702185183763504, "learning_rate": 0.01, "loss": 2.0715, "step": 16005 }, { "epoch": 1.6428571428571428, "grad_norm": 0.0355604812502861, "learning_rate": 0.01, "loss": 2.0461, "step": 16008 }, { "epoch": 1.6431650246305418, "grad_norm": 0.03151632100343704, "learning_rate": 0.01, "loss": 2.0205, "step": 16011 }, { "epoch": 1.6434729064039408, "grad_norm": 0.04302441328763962, "learning_rate": 0.01, "loss": 2.0824, "step": 16014 }, { "epoch": 1.6437807881773399, "grad_norm": 0.06012306734919548, "learning_rate": 0.01, "loss": 2.0494, "step": 16017 }, { "epoch": 1.6440886699507389, "grad_norm": 0.04698712378740311, "learning_rate": 0.01, "loss": 2.0364, "step": 16020 }, { "epoch": 1.644396551724138, "grad_norm": 0.03930363059043884, "learning_rate": 0.01, "loss": 2.0612, "step": 16023 }, { "epoch": 1.644704433497537, "grad_norm": 0.0881473496556282, "learning_rate": 0.01, "loss": 2.0724, "step": 16026 }, { "epoch": 1.645012315270936, "grad_norm": 0.04207085818052292, "learning_rate": 0.01, "loss": 2.0524, "step": 16029 }, { "epoch": 1.645320197044335, "grad_norm": 0.04729215428233147, "learning_rate": 0.01, "loss": 2.0538, "step": 16032 }, { "epoch": 1.645628078817734, "grad_norm": 0.050990305840969086, "learning_rate": 0.01, "loss": 2.0473, "step": 16035 }, { "epoch": 1.645935960591133, "grad_norm": 0.05049813538789749, "learning_rate": 0.01, "loss": 2.0609, "step": 16038 }, { "epoch": 1.646243842364532, "grad_norm": 0.07787630707025528, "learning_rate": 0.01, "loss": 2.0505, "step": 16041 }, { "epoch": 1.646551724137931, "grad_norm": 0.06656081229448318, "learning_rate": 0.01, "loss": 2.0365, "step": 16044 }, { "epoch": 1.64685960591133, "grad_norm": 0.08293109387159348, "learning_rate": 0.01, "loss": 2.062, "step": 16047 }, { "epoch": 1.647167487684729, "grad_norm": 0.0775810182094574, "learning_rate": 0.01, "loss": 2.0466, "step": 16050 }, { "epoch": 1.6474753694581281, "grad_norm": 0.07917825132608414, "learning_rate": 0.01, "loss": 2.067, "step": 16053 }, { "epoch": 1.6477832512315271, "grad_norm": 0.07658717036247253, "learning_rate": 0.01, "loss": 2.0323, "step": 16056 }, { "epoch": 1.6480911330049262, "grad_norm": 0.07735300809144974, "learning_rate": 0.01, "loss": 2.0481, "step": 16059 }, { "epoch": 1.6483990147783252, "grad_norm": 0.07964644581079483, "learning_rate": 0.01, "loss": 2.0469, "step": 16062 }, { "epoch": 1.6487068965517242, "grad_norm": 0.0601799339056015, "learning_rate": 0.01, "loss": 2.0453, "step": 16065 }, { "epoch": 1.6490147783251232, "grad_norm": 0.1039920225739479, "learning_rate": 0.01, "loss": 2.0474, "step": 16068 }, { "epoch": 1.6493226600985222, "grad_norm": 0.055755455046892166, "learning_rate": 0.01, "loss": 2.0615, "step": 16071 }, { "epoch": 1.6496305418719213, "grad_norm": 0.0998646542429924, "learning_rate": 0.01, "loss": 2.0675, "step": 16074 }, { "epoch": 1.6499384236453203, "grad_norm": 0.04582648724317551, "learning_rate": 0.01, "loss": 2.0277, "step": 16077 }, { "epoch": 1.6502463054187193, "grad_norm": 0.08638078719377518, "learning_rate": 0.01, "loss": 2.0473, "step": 16080 }, { "epoch": 1.6505541871921183, "grad_norm": 0.053813617676496506, "learning_rate": 0.01, "loss": 2.0488, "step": 16083 }, { "epoch": 1.6508620689655173, "grad_norm": 0.08186789602041245, "learning_rate": 0.01, "loss": 2.07, "step": 16086 }, { "epoch": 1.6511699507389164, "grad_norm": 0.037794895470142365, "learning_rate": 0.01, "loss": 2.0554, "step": 16089 }, { "epoch": 1.6514778325123154, "grad_norm": 0.1052238717675209, "learning_rate": 0.01, "loss": 2.0614, "step": 16092 }, { "epoch": 1.6517857142857144, "grad_norm": 0.07596205919981003, "learning_rate": 0.01, "loss": 2.0358, "step": 16095 }, { "epoch": 1.6520935960591134, "grad_norm": 0.047295670956373215, "learning_rate": 0.01, "loss": 2.0488, "step": 16098 }, { "epoch": 1.6524014778325125, "grad_norm": 0.05572659894824028, "learning_rate": 0.01, "loss": 2.0468, "step": 16101 }, { "epoch": 1.6527093596059115, "grad_norm": 0.0429069958627224, "learning_rate": 0.01, "loss": 2.0681, "step": 16104 }, { "epoch": 1.6530172413793105, "grad_norm": 0.055060967803001404, "learning_rate": 0.01, "loss": 2.0347, "step": 16107 }, { "epoch": 1.6533251231527095, "grad_norm": 0.05243745073676109, "learning_rate": 0.01, "loss": 2.0696, "step": 16110 }, { "epoch": 1.6536330049261085, "grad_norm": 0.052228983491659164, "learning_rate": 0.01, "loss": 2.06, "step": 16113 }, { "epoch": 1.6539408866995073, "grad_norm": 0.065925233066082, "learning_rate": 0.01, "loss": 2.0707, "step": 16116 }, { "epoch": 1.6542487684729064, "grad_norm": 0.05819106101989746, "learning_rate": 0.01, "loss": 2.0137, "step": 16119 }, { "epoch": 1.6545566502463054, "grad_norm": 0.04320794716477394, "learning_rate": 0.01, "loss": 2.0691, "step": 16122 }, { "epoch": 1.6548645320197044, "grad_norm": 0.04202846437692642, "learning_rate": 0.01, "loss": 2.0456, "step": 16125 }, { "epoch": 1.6551724137931034, "grad_norm": 0.12747296690940857, "learning_rate": 0.01, "loss": 2.0419, "step": 16128 }, { "epoch": 1.6554802955665024, "grad_norm": 0.07199030369520187, "learning_rate": 0.01, "loss": 2.0347, "step": 16131 }, { "epoch": 1.6557881773399015, "grad_norm": 0.085335373878479, "learning_rate": 0.01, "loss": 2.0383, "step": 16134 }, { "epoch": 1.6560960591133005, "grad_norm": 0.061818841844797134, "learning_rate": 0.01, "loss": 2.0631, "step": 16137 }, { "epoch": 1.6564039408866995, "grad_norm": 0.06255804747343063, "learning_rate": 0.01, "loss": 2.0492, "step": 16140 }, { "epoch": 1.6567118226600985, "grad_norm": 0.08308485150337219, "learning_rate": 0.01, "loss": 2.0814, "step": 16143 }, { "epoch": 1.6570197044334976, "grad_norm": 0.06358073651790619, "learning_rate": 0.01, "loss": 2.048, "step": 16146 }, { "epoch": 1.6573275862068966, "grad_norm": 0.085427425801754, "learning_rate": 0.01, "loss": 2.0433, "step": 16149 }, { "epoch": 1.6576354679802956, "grad_norm": 0.043243568390607834, "learning_rate": 0.01, "loss": 2.0432, "step": 16152 }, { "epoch": 1.6579433497536946, "grad_norm": 0.06593325734138489, "learning_rate": 0.01, "loss": 2.0469, "step": 16155 }, { "epoch": 1.6582512315270936, "grad_norm": 0.14644569158554077, "learning_rate": 0.01, "loss": 2.0689, "step": 16158 }, { "epoch": 1.6585591133004927, "grad_norm": 0.1211152896285057, "learning_rate": 0.01, "loss": 2.0505, "step": 16161 }, { "epoch": 1.6588669950738915, "grad_norm": 0.11020830273628235, "learning_rate": 0.01, "loss": 2.0572, "step": 16164 }, { "epoch": 1.6591748768472905, "grad_norm": 0.08850467950105667, "learning_rate": 0.01, "loss": 2.0424, "step": 16167 }, { "epoch": 1.6594827586206895, "grad_norm": 0.050562698394060135, "learning_rate": 0.01, "loss": 2.0542, "step": 16170 }, { "epoch": 1.6597906403940885, "grad_norm": 0.048076871782541275, "learning_rate": 0.01, "loss": 2.0433, "step": 16173 }, { "epoch": 1.6600985221674875, "grad_norm": 0.03727034851908684, "learning_rate": 0.01, "loss": 2.0333, "step": 16176 }, { "epoch": 1.6604064039408866, "grad_norm": 0.048614371567964554, "learning_rate": 0.01, "loss": 2.0552, "step": 16179 }, { "epoch": 1.6607142857142856, "grad_norm": 0.05649641901254654, "learning_rate": 0.01, "loss": 2.0536, "step": 16182 }, { "epoch": 1.6610221674876846, "grad_norm": 0.05329003930091858, "learning_rate": 0.01, "loss": 2.0386, "step": 16185 }, { "epoch": 1.6613300492610836, "grad_norm": 0.06444583833217621, "learning_rate": 0.01, "loss": 2.055, "step": 16188 }, { "epoch": 1.6616379310344827, "grad_norm": 0.045777902007102966, "learning_rate": 0.01, "loss": 2.0476, "step": 16191 }, { "epoch": 1.6619458128078817, "grad_norm": 0.04831868037581444, "learning_rate": 0.01, "loss": 2.0582, "step": 16194 }, { "epoch": 1.6622536945812807, "grad_norm": 0.10648196935653687, "learning_rate": 0.01, "loss": 2.0579, "step": 16197 }, { "epoch": 1.6625615763546797, "grad_norm": 0.08369257301092148, "learning_rate": 0.01, "loss": 2.0505, "step": 16200 }, { "epoch": 1.6628694581280787, "grad_norm": 0.13716475665569305, "learning_rate": 0.01, "loss": 2.0383, "step": 16203 }, { "epoch": 1.6631773399014778, "grad_norm": 0.05025027319788933, "learning_rate": 0.01, "loss": 2.0549, "step": 16206 }, { "epoch": 1.6634852216748768, "grad_norm": 0.03850054368376732, "learning_rate": 0.01, "loss": 2.0412, "step": 16209 }, { "epoch": 1.6637931034482758, "grad_norm": 0.046656832098960876, "learning_rate": 0.01, "loss": 2.0595, "step": 16212 }, { "epoch": 1.6641009852216748, "grad_norm": 0.03826647624373436, "learning_rate": 0.01, "loss": 2.0352, "step": 16215 }, { "epoch": 1.6644088669950738, "grad_norm": 0.061087023466825485, "learning_rate": 0.01, "loss": 2.0357, "step": 16218 }, { "epoch": 1.6647167487684729, "grad_norm": 0.03787006065249443, "learning_rate": 0.01, "loss": 2.0226, "step": 16221 }, { "epoch": 1.6650246305418719, "grad_norm": 0.09619265049695969, "learning_rate": 0.01, "loss": 2.0399, "step": 16224 }, { "epoch": 1.665332512315271, "grad_norm": 0.04012330621480942, "learning_rate": 0.01, "loss": 2.044, "step": 16227 }, { "epoch": 1.66564039408867, "grad_norm": 0.062126293778419495, "learning_rate": 0.01, "loss": 2.0726, "step": 16230 }, { "epoch": 1.665948275862069, "grad_norm": 0.050277624279260635, "learning_rate": 0.01, "loss": 2.0219, "step": 16233 }, { "epoch": 1.666256157635468, "grad_norm": 0.03983129933476448, "learning_rate": 0.01, "loss": 2.0554, "step": 16236 }, { "epoch": 1.666564039408867, "grad_norm": 0.13119915127754211, "learning_rate": 0.01, "loss": 2.0682, "step": 16239 }, { "epoch": 1.666871921182266, "grad_norm": 0.0525536946952343, "learning_rate": 0.01, "loss": 2.0524, "step": 16242 }, { "epoch": 1.667179802955665, "grad_norm": 0.056762780994176865, "learning_rate": 0.01, "loss": 2.0293, "step": 16245 }, { "epoch": 1.667487684729064, "grad_norm": 0.08652041852474213, "learning_rate": 0.01, "loss": 2.0574, "step": 16248 }, { "epoch": 1.667795566502463, "grad_norm": 0.14455944299697876, "learning_rate": 0.01, "loss": 2.0406, "step": 16251 }, { "epoch": 1.668103448275862, "grad_norm": 0.03951118513941765, "learning_rate": 0.01, "loss": 2.0368, "step": 16254 }, { "epoch": 1.6684113300492611, "grad_norm": 0.040585123002529144, "learning_rate": 0.01, "loss": 2.017, "step": 16257 }, { "epoch": 1.6687192118226601, "grad_norm": 0.05393810570240021, "learning_rate": 0.01, "loss": 2.0679, "step": 16260 }, { "epoch": 1.6690270935960592, "grad_norm": 0.050093088299036026, "learning_rate": 0.01, "loss": 2.0546, "step": 16263 }, { "epoch": 1.6693349753694582, "grad_norm": 0.04196159914135933, "learning_rate": 0.01, "loss": 2.0488, "step": 16266 }, { "epoch": 1.6696428571428572, "grad_norm": 0.03978092968463898, "learning_rate": 0.01, "loss": 2.0453, "step": 16269 }, { "epoch": 1.6699507389162562, "grad_norm": 0.05054232105612755, "learning_rate": 0.01, "loss": 2.0337, "step": 16272 }, { "epoch": 1.6702586206896552, "grad_norm": 0.0746975764632225, "learning_rate": 0.01, "loss": 2.0636, "step": 16275 }, { "epoch": 1.6705665024630543, "grad_norm": 0.05685516446828842, "learning_rate": 0.01, "loss": 2.0591, "step": 16278 }, { "epoch": 1.6708743842364533, "grad_norm": 0.031971871852874756, "learning_rate": 0.01, "loss": 2.0657, "step": 16281 }, { "epoch": 1.6711822660098523, "grad_norm": 0.03947863727807999, "learning_rate": 0.01, "loss": 2.0333, "step": 16284 }, { "epoch": 1.6714901477832513, "grad_norm": 0.11271070688962936, "learning_rate": 0.01, "loss": 2.0421, "step": 16287 }, { "epoch": 1.6717980295566504, "grad_norm": 0.05308755114674568, "learning_rate": 0.01, "loss": 2.0536, "step": 16290 }, { "epoch": 1.6721059113300494, "grad_norm": 0.042826078832149506, "learning_rate": 0.01, "loss": 2.0694, "step": 16293 }, { "epoch": 1.6724137931034484, "grad_norm": 0.0458630695939064, "learning_rate": 0.01, "loss": 2.0312, "step": 16296 }, { "epoch": 1.6727216748768474, "grad_norm": 0.05401900038123131, "learning_rate": 0.01, "loss": 2.0613, "step": 16299 }, { "epoch": 1.6730295566502464, "grad_norm": 0.05380195751786232, "learning_rate": 0.01, "loss": 2.0336, "step": 16302 }, { "epoch": 1.6733374384236455, "grad_norm": 0.038716450333595276, "learning_rate": 0.01, "loss": 2.0548, "step": 16305 }, { "epoch": 1.6736453201970445, "grad_norm": 0.04034694656729698, "learning_rate": 0.01, "loss": 2.0421, "step": 16308 }, { "epoch": 1.6739532019704435, "grad_norm": 0.06753403693437576, "learning_rate": 0.01, "loss": 2.0324, "step": 16311 }, { "epoch": 1.6742610837438425, "grad_norm": 0.10001173615455627, "learning_rate": 0.01, "loss": 2.0467, "step": 16314 }, { "epoch": 1.6745689655172413, "grad_norm": 0.04366351664066315, "learning_rate": 0.01, "loss": 2.0622, "step": 16317 }, { "epoch": 1.6748768472906403, "grad_norm": 0.07137630879878998, "learning_rate": 0.01, "loss": 2.0333, "step": 16320 }, { "epoch": 1.6751847290640394, "grad_norm": 0.049938492476940155, "learning_rate": 0.01, "loss": 2.0426, "step": 16323 }, { "epoch": 1.6754926108374384, "grad_norm": 0.03337172046303749, "learning_rate": 0.01, "loss": 2.0462, "step": 16326 }, { "epoch": 1.6758004926108374, "grad_norm": 0.07407473772764206, "learning_rate": 0.01, "loss": 2.0705, "step": 16329 }, { "epoch": 1.6761083743842364, "grad_norm": 0.07006946206092834, "learning_rate": 0.01, "loss": 2.0383, "step": 16332 }, { "epoch": 1.6764162561576355, "grad_norm": 0.05342825874686241, "learning_rate": 0.01, "loss": 2.0481, "step": 16335 }, { "epoch": 1.6767241379310345, "grad_norm": 0.052405234426259995, "learning_rate": 0.01, "loss": 2.0138, "step": 16338 }, { "epoch": 1.6770320197044335, "grad_norm": 0.20231324434280396, "learning_rate": 0.01, "loss": 2.0472, "step": 16341 }, { "epoch": 1.6773399014778325, "grad_norm": 0.07893595844507217, "learning_rate": 0.01, "loss": 2.0415, "step": 16344 }, { "epoch": 1.6776477832512315, "grad_norm": 0.06872416287660599, "learning_rate": 0.01, "loss": 2.0376, "step": 16347 }, { "epoch": 1.6779556650246306, "grad_norm": 0.041687123477458954, "learning_rate": 0.01, "loss": 2.0442, "step": 16350 }, { "epoch": 1.6782635467980296, "grad_norm": 0.04184873029589653, "learning_rate": 0.01, "loss": 2.0769, "step": 16353 }, { "epoch": 1.6785714285714286, "grad_norm": 0.036598458886146545, "learning_rate": 0.01, "loss": 2.0255, "step": 16356 }, { "epoch": 1.6788793103448276, "grad_norm": 0.062203384935855865, "learning_rate": 0.01, "loss": 2.0582, "step": 16359 }, { "epoch": 1.6791871921182266, "grad_norm": 0.04513971135020256, "learning_rate": 0.01, "loss": 2.0475, "step": 16362 }, { "epoch": 1.6794950738916257, "grad_norm": 0.043875399976968765, "learning_rate": 0.01, "loss": 2.0455, "step": 16365 }, { "epoch": 1.6798029556650245, "grad_norm": 0.030207300558686256, "learning_rate": 0.01, "loss": 2.0733, "step": 16368 }, { "epoch": 1.6801108374384235, "grad_norm": 0.07749854028224945, "learning_rate": 0.01, "loss": 2.0402, "step": 16371 }, { "epoch": 1.6804187192118225, "grad_norm": 0.10269973427057266, "learning_rate": 0.01, "loss": 2.0342, "step": 16374 }, { "epoch": 1.6807266009852215, "grad_norm": 0.043558500707149506, "learning_rate": 0.01, "loss": 2.0429, "step": 16377 }, { "epoch": 1.6810344827586206, "grad_norm": 0.0490686409175396, "learning_rate": 0.01, "loss": 2.0381, "step": 16380 }, { "epoch": 1.6813423645320196, "grad_norm": 0.062107689678668976, "learning_rate": 0.01, "loss": 2.0592, "step": 16383 }, { "epoch": 1.6816502463054186, "grad_norm": 0.0856776013970375, "learning_rate": 0.01, "loss": 2.0666, "step": 16386 }, { "epoch": 1.6819581280788176, "grad_norm": 0.11694356054067612, "learning_rate": 0.01, "loss": 2.0545, "step": 16389 }, { "epoch": 1.6822660098522166, "grad_norm": 0.07279752194881439, "learning_rate": 0.01, "loss": 2.0348, "step": 16392 }, { "epoch": 1.6825738916256157, "grad_norm": 0.06813056766986847, "learning_rate": 0.01, "loss": 2.0549, "step": 16395 }, { "epoch": 1.6828817733990147, "grad_norm": 0.045916907489299774, "learning_rate": 0.01, "loss": 2.0714, "step": 16398 }, { "epoch": 1.6831896551724137, "grad_norm": 0.04464447498321533, "learning_rate": 0.01, "loss": 2.0655, "step": 16401 }, { "epoch": 1.6834975369458127, "grad_norm": 0.04815223440527916, "learning_rate": 0.01, "loss": 2.0633, "step": 16404 }, { "epoch": 1.6838054187192117, "grad_norm": 0.06025001034140587, "learning_rate": 0.01, "loss": 2.0325, "step": 16407 }, { "epoch": 1.6841133004926108, "grad_norm": 0.05691540613770485, "learning_rate": 0.01, "loss": 2.07, "step": 16410 }, { "epoch": 1.6844211822660098, "grad_norm": 0.04643694683909416, "learning_rate": 0.01, "loss": 2.0478, "step": 16413 }, { "epoch": 1.6847290640394088, "grad_norm": 0.03540325164794922, "learning_rate": 0.01, "loss": 2.0739, "step": 16416 }, { "epoch": 1.6850369458128078, "grad_norm": 0.034472569823265076, "learning_rate": 0.01, "loss": 2.0441, "step": 16419 }, { "epoch": 1.6853448275862069, "grad_norm": 0.04316902533173561, "learning_rate": 0.01, "loss": 2.0422, "step": 16422 }, { "epoch": 1.6856527093596059, "grad_norm": 0.04943558946251869, "learning_rate": 0.01, "loss": 2.0377, "step": 16425 }, { "epoch": 1.685960591133005, "grad_norm": 0.11482315510511398, "learning_rate": 0.01, "loss": 2.0668, "step": 16428 }, { "epoch": 1.686268472906404, "grad_norm": 0.10594377666711807, "learning_rate": 0.01, "loss": 2.0513, "step": 16431 }, { "epoch": 1.686576354679803, "grad_norm": 0.09860610961914062, "learning_rate": 0.01, "loss": 2.0456, "step": 16434 }, { "epoch": 1.686884236453202, "grad_norm": 0.06849053502082825, "learning_rate": 0.01, "loss": 2.0645, "step": 16437 }, { "epoch": 1.687192118226601, "grad_norm": 0.05089464411139488, "learning_rate": 0.01, "loss": 2.0383, "step": 16440 }, { "epoch": 1.6875, "grad_norm": 0.04762034863233566, "learning_rate": 0.01, "loss": 2.0443, "step": 16443 }, { "epoch": 1.687807881773399, "grad_norm": 0.09014497697353363, "learning_rate": 0.01, "loss": 2.0736, "step": 16446 }, { "epoch": 1.688115763546798, "grad_norm": 0.06832917779684067, "learning_rate": 0.01, "loss": 2.0677, "step": 16449 }, { "epoch": 1.688423645320197, "grad_norm": 0.0529920794069767, "learning_rate": 0.01, "loss": 2.0423, "step": 16452 }, { "epoch": 1.688731527093596, "grad_norm": 0.03208652511239052, "learning_rate": 0.01, "loss": 2.0561, "step": 16455 }, { "epoch": 1.689039408866995, "grad_norm": 0.13702784478664398, "learning_rate": 0.01, "loss": 2.0393, "step": 16458 }, { "epoch": 1.6893472906403941, "grad_norm": 0.05972970649600029, "learning_rate": 0.01, "loss": 2.0795, "step": 16461 }, { "epoch": 1.6896551724137931, "grad_norm": 0.043536797165870667, "learning_rate": 0.01, "loss": 2.0622, "step": 16464 }, { "epoch": 1.6899630541871922, "grad_norm": 0.0556536540389061, "learning_rate": 0.01, "loss": 2.0523, "step": 16467 }, { "epoch": 1.6902709359605912, "grad_norm": 0.06583042442798615, "learning_rate": 0.01, "loss": 2.0568, "step": 16470 }, { "epoch": 1.6905788177339902, "grad_norm": 0.0535028837621212, "learning_rate": 0.01, "loss": 2.0427, "step": 16473 }, { "epoch": 1.6908866995073892, "grad_norm": 0.09974632412195206, "learning_rate": 0.01, "loss": 2.0589, "step": 16476 }, { "epoch": 1.6911945812807883, "grad_norm": 0.058350350707769394, "learning_rate": 0.01, "loss": 2.0392, "step": 16479 }, { "epoch": 1.6915024630541873, "grad_norm": 0.10049036890268326, "learning_rate": 0.01, "loss": 2.0643, "step": 16482 }, { "epoch": 1.6918103448275863, "grad_norm": 0.061119675636291504, "learning_rate": 0.01, "loss": 2.0455, "step": 16485 }, { "epoch": 1.6921182266009853, "grad_norm": 0.07189033925533295, "learning_rate": 0.01, "loss": 2.0629, "step": 16488 }, { "epoch": 1.6924261083743843, "grad_norm": 0.08962611109018326, "learning_rate": 0.01, "loss": 2.0586, "step": 16491 }, { "epoch": 1.6927339901477834, "grad_norm": 0.05600450560450554, "learning_rate": 0.01, "loss": 2.0434, "step": 16494 }, { "epoch": 1.6930418719211824, "grad_norm": 0.1281098574399948, "learning_rate": 0.01, "loss": 2.0241, "step": 16497 }, { "epoch": 1.6933497536945814, "grad_norm": 0.036696117371320724, "learning_rate": 0.01, "loss": 2.065, "step": 16500 }, { "epoch": 1.6936576354679804, "grad_norm": 0.12428770959377289, "learning_rate": 0.01, "loss": 2.0479, "step": 16503 }, { "epoch": 1.6939655172413794, "grad_norm": 0.07593953609466553, "learning_rate": 0.01, "loss": 2.0357, "step": 16506 }, { "epoch": 1.6942733990147785, "grad_norm": 0.0686376765370369, "learning_rate": 0.01, "loss": 2.05, "step": 16509 }, { "epoch": 1.6945812807881775, "grad_norm": 0.044805269688367844, "learning_rate": 0.01, "loss": 2.0514, "step": 16512 }, { "epoch": 1.6948891625615765, "grad_norm": 0.04698259010910988, "learning_rate": 0.01, "loss": 2.0505, "step": 16515 }, { "epoch": 1.6951970443349755, "grad_norm": 0.04546966403722763, "learning_rate": 0.01, "loss": 2.0265, "step": 16518 }, { "epoch": 1.6955049261083743, "grad_norm": 0.07239431142807007, "learning_rate": 0.01, "loss": 2.0403, "step": 16521 }, { "epoch": 1.6958128078817734, "grad_norm": 0.08790195733308792, "learning_rate": 0.01, "loss": 2.0721, "step": 16524 }, { "epoch": 1.6961206896551724, "grad_norm": 0.05445432290434837, "learning_rate": 0.01, "loss": 2.039, "step": 16527 }, { "epoch": 1.6964285714285714, "grad_norm": 0.048141270875930786, "learning_rate": 0.01, "loss": 2.0191, "step": 16530 }, { "epoch": 1.6967364532019704, "grad_norm": 0.05230564624071121, "learning_rate": 0.01, "loss": 2.0646, "step": 16533 }, { "epoch": 1.6970443349753694, "grad_norm": 0.1007009968161583, "learning_rate": 0.01, "loss": 2.0751, "step": 16536 }, { "epoch": 1.6973522167487685, "grad_norm": 0.03878286853432655, "learning_rate": 0.01, "loss": 2.0257, "step": 16539 }, { "epoch": 1.6976600985221675, "grad_norm": 0.08503543585538864, "learning_rate": 0.01, "loss": 2.0516, "step": 16542 }, { "epoch": 1.6979679802955665, "grad_norm": 0.06239473819732666, "learning_rate": 0.01, "loss": 2.057, "step": 16545 }, { "epoch": 1.6982758620689655, "grad_norm": 0.06893055140972137, "learning_rate": 0.01, "loss": 2.0435, "step": 16548 }, { "epoch": 1.6985837438423645, "grad_norm": 0.08434829860925674, "learning_rate": 0.01, "loss": 2.0319, "step": 16551 }, { "epoch": 1.6988916256157636, "grad_norm": 0.031773362308740616, "learning_rate": 0.01, "loss": 2.0585, "step": 16554 }, { "epoch": 1.6991995073891626, "grad_norm": 0.11598584800958633, "learning_rate": 0.01, "loss": 2.0423, "step": 16557 }, { "epoch": 1.6995073891625616, "grad_norm": 0.07008111476898193, "learning_rate": 0.01, "loss": 2.0787, "step": 16560 }, { "epoch": 1.6998152709359606, "grad_norm": 0.03940622881054878, "learning_rate": 0.01, "loss": 2.0525, "step": 16563 }, { "epoch": 1.7001231527093597, "grad_norm": 0.05206933617591858, "learning_rate": 0.01, "loss": 2.0671, "step": 16566 }, { "epoch": 1.7004310344827587, "grad_norm": 0.04568307474255562, "learning_rate": 0.01, "loss": 2.0413, "step": 16569 }, { "epoch": 1.7007389162561575, "grad_norm": 0.031628433614969254, "learning_rate": 0.01, "loss": 2.0323, "step": 16572 }, { "epoch": 1.7010467980295565, "grad_norm": 0.05636722221970558, "learning_rate": 0.01, "loss": 2.0403, "step": 16575 }, { "epoch": 1.7013546798029555, "grad_norm": 0.11134552955627441, "learning_rate": 0.01, "loss": 2.034, "step": 16578 }, { "epoch": 1.7016625615763545, "grad_norm": 0.06964823603630066, "learning_rate": 0.01, "loss": 2.0701, "step": 16581 }, { "epoch": 1.7019704433497536, "grad_norm": 0.041148003190755844, "learning_rate": 0.01, "loss": 2.0693, "step": 16584 }, { "epoch": 1.7022783251231526, "grad_norm": 0.03673578426241875, "learning_rate": 0.01, "loss": 2.0232, "step": 16587 }, { "epoch": 1.7025862068965516, "grad_norm": 0.03659043833613396, "learning_rate": 0.01, "loss": 2.0257, "step": 16590 }, { "epoch": 1.7028940886699506, "grad_norm": 0.03824566677212715, "learning_rate": 0.01, "loss": 2.0587, "step": 16593 }, { "epoch": 1.7032019704433496, "grad_norm": 0.07334180176258087, "learning_rate": 0.01, "loss": 2.0626, "step": 16596 }, { "epoch": 1.7035098522167487, "grad_norm": 0.055927857756614685, "learning_rate": 0.01, "loss": 2.0311, "step": 16599 }, { "epoch": 1.7038177339901477, "grad_norm": 0.07610691338777542, "learning_rate": 0.01, "loss": 2.0419, "step": 16602 }, { "epoch": 1.7041256157635467, "grad_norm": 0.06405298411846161, "learning_rate": 0.01, "loss": 2.0693, "step": 16605 }, { "epoch": 1.7044334975369457, "grad_norm": 0.06193486601114273, "learning_rate": 0.01, "loss": 2.0442, "step": 16608 }, { "epoch": 1.7047413793103448, "grad_norm": 0.12181366235017776, "learning_rate": 0.01, "loss": 2.0324, "step": 16611 }, { "epoch": 1.7050492610837438, "grad_norm": 0.049060508608818054, "learning_rate": 0.01, "loss": 2.044, "step": 16614 }, { "epoch": 1.7053571428571428, "grad_norm": 0.05021090805530548, "learning_rate": 0.01, "loss": 2.0501, "step": 16617 }, { "epoch": 1.7056650246305418, "grad_norm": 0.045171257108449936, "learning_rate": 0.01, "loss": 2.0418, "step": 16620 }, { "epoch": 1.7059729064039408, "grad_norm": 0.04944808408617973, "learning_rate": 0.01, "loss": 2.0576, "step": 16623 }, { "epoch": 1.7062807881773399, "grad_norm": 0.03556932508945465, "learning_rate": 0.01, "loss": 2.0405, "step": 16626 }, { "epoch": 1.7065886699507389, "grad_norm": 0.10005172342061996, "learning_rate": 0.01, "loss": 2.0422, "step": 16629 }, { "epoch": 1.706896551724138, "grad_norm": 0.04088572412729263, "learning_rate": 0.01, "loss": 2.0526, "step": 16632 }, { "epoch": 1.707204433497537, "grad_norm": 0.04937949404120445, "learning_rate": 0.01, "loss": 2.0656, "step": 16635 }, { "epoch": 1.707512315270936, "grad_norm": 0.07822302728891373, "learning_rate": 0.01, "loss": 2.0545, "step": 16638 }, { "epoch": 1.707820197044335, "grad_norm": 0.05767158418893814, "learning_rate": 0.01, "loss": 2.0515, "step": 16641 }, { "epoch": 1.708128078817734, "grad_norm": 0.08512212336063385, "learning_rate": 0.01, "loss": 2.0579, "step": 16644 }, { "epoch": 1.708435960591133, "grad_norm": 0.06758993119001389, "learning_rate": 0.01, "loss": 2.0307, "step": 16647 }, { "epoch": 1.708743842364532, "grad_norm": 0.08142005652189255, "learning_rate": 0.01, "loss": 2.0531, "step": 16650 }, { "epoch": 1.709051724137931, "grad_norm": 0.06357218325138092, "learning_rate": 0.01, "loss": 2.0475, "step": 16653 }, { "epoch": 1.70935960591133, "grad_norm": 0.10591546446084976, "learning_rate": 0.01, "loss": 2.0447, "step": 16656 }, { "epoch": 1.709667487684729, "grad_norm": 0.06571496278047562, "learning_rate": 0.01, "loss": 2.0176, "step": 16659 }, { "epoch": 1.7099753694581281, "grad_norm": 0.049450814723968506, "learning_rate": 0.01, "loss": 2.0522, "step": 16662 }, { "epoch": 1.7102832512315271, "grad_norm": 0.11850273609161377, "learning_rate": 0.01, "loss": 2.0575, "step": 16665 }, { "epoch": 1.7105911330049262, "grad_norm": 0.0952281728386879, "learning_rate": 0.01, "loss": 2.0442, "step": 16668 }, { "epoch": 1.7108990147783252, "grad_norm": 0.09431217610836029, "learning_rate": 0.01, "loss": 2.0403, "step": 16671 }, { "epoch": 1.7112068965517242, "grad_norm": 0.07080823183059692, "learning_rate": 0.01, "loss": 2.0111, "step": 16674 }, { "epoch": 1.7115147783251232, "grad_norm": 0.049033813178539276, "learning_rate": 0.01, "loss": 2.0489, "step": 16677 }, { "epoch": 1.7118226600985222, "grad_norm": 0.04356718435883522, "learning_rate": 0.01, "loss": 2.035, "step": 16680 }, { "epoch": 1.7121305418719213, "grad_norm": 0.03276592493057251, "learning_rate": 0.01, "loss": 2.0396, "step": 16683 }, { "epoch": 1.7124384236453203, "grad_norm": 0.04438839852809906, "learning_rate": 0.01, "loss": 2.0441, "step": 16686 }, { "epoch": 1.7127463054187193, "grad_norm": 0.07276454567909241, "learning_rate": 0.01, "loss": 2.0513, "step": 16689 }, { "epoch": 1.7130541871921183, "grad_norm": 0.11324001848697662, "learning_rate": 0.01, "loss": 2.0482, "step": 16692 }, { "epoch": 1.7133620689655173, "grad_norm": 0.14715081453323364, "learning_rate": 0.01, "loss": 2.048, "step": 16695 }, { "epoch": 1.7136699507389164, "grad_norm": 0.07661852240562439, "learning_rate": 0.01, "loss": 2.0396, "step": 16698 }, { "epoch": 1.7139778325123154, "grad_norm": 0.05308947339653969, "learning_rate": 0.01, "loss": 2.0601, "step": 16701 }, { "epoch": 1.7142857142857144, "grad_norm": 0.06816977262496948, "learning_rate": 0.01, "loss": 2.0456, "step": 16704 }, { "epoch": 1.7145935960591134, "grad_norm": 0.05123249441385269, "learning_rate": 0.01, "loss": 2.0532, "step": 16707 }, { "epoch": 1.7149014778325125, "grad_norm": 0.05118009075522423, "learning_rate": 0.01, "loss": 2.0659, "step": 16710 }, { "epoch": 1.7152093596059115, "grad_norm": 0.03276235982775688, "learning_rate": 0.01, "loss": 2.0718, "step": 16713 }, { "epoch": 1.7155172413793105, "grad_norm": 0.049824655055999756, "learning_rate": 0.01, "loss": 2.0388, "step": 16716 }, { "epoch": 1.7158251231527095, "grad_norm": 0.1416471302509308, "learning_rate": 0.01, "loss": 2.0526, "step": 16719 }, { "epoch": 1.7161330049261085, "grad_norm": 0.04109251871705055, "learning_rate": 0.01, "loss": 2.0329, "step": 16722 }, { "epoch": 1.7164408866995073, "grad_norm": 0.08853971213102341, "learning_rate": 0.01, "loss": 2.0506, "step": 16725 }, { "epoch": 1.7167487684729064, "grad_norm": 0.05180136114358902, "learning_rate": 0.01, "loss": 2.0608, "step": 16728 }, { "epoch": 1.7170566502463054, "grad_norm": 0.0667758584022522, "learning_rate": 0.01, "loss": 2.0347, "step": 16731 }, { "epoch": 1.7173645320197044, "grad_norm": 0.039203155785799026, "learning_rate": 0.01, "loss": 2.0331, "step": 16734 }, { "epoch": 1.7176724137931034, "grad_norm": 0.05210564285516739, "learning_rate": 0.01, "loss": 2.0666, "step": 16737 }, { "epoch": 1.7179802955665024, "grad_norm": 0.0668390691280365, "learning_rate": 0.01, "loss": 2.0365, "step": 16740 }, { "epoch": 1.7182881773399015, "grad_norm": 0.05041831359267235, "learning_rate": 0.01, "loss": 2.0261, "step": 16743 }, { "epoch": 1.7185960591133005, "grad_norm": 0.04496284946799278, "learning_rate": 0.01, "loss": 2.0182, "step": 16746 }, { "epoch": 1.7189039408866995, "grad_norm": 0.08660906553268433, "learning_rate": 0.01, "loss": 2.0434, "step": 16749 }, { "epoch": 1.7192118226600985, "grad_norm": 0.054843079298734665, "learning_rate": 0.01, "loss": 2.0522, "step": 16752 }, { "epoch": 1.7195197044334976, "grad_norm": 0.05377354100346565, "learning_rate": 0.01, "loss": 2.0263, "step": 16755 }, { "epoch": 1.7198275862068966, "grad_norm": 0.060547634959220886, "learning_rate": 0.01, "loss": 2.0165, "step": 16758 }, { "epoch": 1.7201354679802956, "grad_norm": 0.06253104656934738, "learning_rate": 0.01, "loss": 2.0479, "step": 16761 }, { "epoch": 1.7204433497536946, "grad_norm": 0.052225805819034576, "learning_rate": 0.01, "loss": 2.03, "step": 16764 }, { "epoch": 1.7207512315270936, "grad_norm": 0.04988449066877365, "learning_rate": 0.01, "loss": 2.0498, "step": 16767 }, { "epoch": 1.7210591133004927, "grad_norm": 0.049726665019989014, "learning_rate": 0.01, "loss": 2.0588, "step": 16770 }, { "epoch": 1.7213669950738915, "grad_norm": 0.05053321272134781, "learning_rate": 0.01, "loss": 2.0518, "step": 16773 }, { "epoch": 1.7216748768472905, "grad_norm": 0.060702208429574966, "learning_rate": 0.01, "loss": 2.0448, "step": 16776 }, { "epoch": 1.7219827586206895, "grad_norm": 0.06695824861526489, "learning_rate": 0.01, "loss": 2.0361, "step": 16779 }, { "epoch": 1.7222906403940885, "grad_norm": 0.07498425990343094, "learning_rate": 0.01, "loss": 2.0652, "step": 16782 }, { "epoch": 1.7225985221674875, "grad_norm": 0.04544251784682274, "learning_rate": 0.01, "loss": 2.0294, "step": 16785 }, { "epoch": 1.7229064039408866, "grad_norm": 0.09211461246013641, "learning_rate": 0.01, "loss": 2.0261, "step": 16788 }, { "epoch": 1.7232142857142856, "grad_norm": 0.19020068645477295, "learning_rate": 0.01, "loss": 2.0459, "step": 16791 }, { "epoch": 1.7235221674876846, "grad_norm": 0.08979224413633347, "learning_rate": 0.01, "loss": 2.052, "step": 16794 }, { "epoch": 1.7238300492610836, "grad_norm": 0.06724744290113449, "learning_rate": 0.01, "loss": 2.0463, "step": 16797 }, { "epoch": 1.7241379310344827, "grad_norm": 0.05009309947490692, "learning_rate": 0.01, "loss": 2.0338, "step": 16800 }, { "epoch": 1.7244458128078817, "grad_norm": 0.04953375831246376, "learning_rate": 0.01, "loss": 2.0554, "step": 16803 }, { "epoch": 1.7247536945812807, "grad_norm": 0.12439639121294022, "learning_rate": 0.01, "loss": 2.0363, "step": 16806 }, { "epoch": 1.7250615763546797, "grad_norm": 0.126046821475029, "learning_rate": 0.01, "loss": 2.0435, "step": 16809 }, { "epoch": 1.7253694581280787, "grad_norm": 0.06693071871995926, "learning_rate": 0.01, "loss": 2.058, "step": 16812 }, { "epoch": 1.7256773399014778, "grad_norm": 0.04883793368935585, "learning_rate": 0.01, "loss": 2.0768, "step": 16815 }, { "epoch": 1.7259852216748768, "grad_norm": 0.06895219534635544, "learning_rate": 0.01, "loss": 2.057, "step": 16818 }, { "epoch": 1.7262931034482758, "grad_norm": 0.06979874521493912, "learning_rate": 0.01, "loss": 2.0641, "step": 16821 }, { "epoch": 1.7266009852216748, "grad_norm": 0.06453370302915573, "learning_rate": 0.01, "loss": 2.025, "step": 16824 }, { "epoch": 1.7269088669950738, "grad_norm": 0.11405997723340988, "learning_rate": 0.01, "loss": 2.0469, "step": 16827 }, { "epoch": 1.7272167487684729, "grad_norm": 0.10612863302230835, "learning_rate": 0.01, "loss": 2.0825, "step": 16830 }, { "epoch": 1.7275246305418719, "grad_norm": 0.09805281460285187, "learning_rate": 0.01, "loss": 2.0256, "step": 16833 }, { "epoch": 1.727832512315271, "grad_norm": 0.08369138836860657, "learning_rate": 0.01, "loss": 2.0307, "step": 16836 }, { "epoch": 1.72814039408867, "grad_norm": 0.08893299847841263, "learning_rate": 0.01, "loss": 2.0362, "step": 16839 }, { "epoch": 1.728448275862069, "grad_norm": 0.06902442872524261, "learning_rate": 0.01, "loss": 2.0562, "step": 16842 }, { "epoch": 1.728756157635468, "grad_norm": 0.058703918009996414, "learning_rate": 0.01, "loss": 2.0406, "step": 16845 }, { "epoch": 1.729064039408867, "grad_norm": 0.03979343920946121, "learning_rate": 0.01, "loss": 2.0636, "step": 16848 }, { "epoch": 1.729371921182266, "grad_norm": 0.08049800246953964, "learning_rate": 0.01, "loss": 2.0483, "step": 16851 }, { "epoch": 1.729679802955665, "grad_norm": 0.07980278134346008, "learning_rate": 0.01, "loss": 2.0471, "step": 16854 }, { "epoch": 1.729987684729064, "grad_norm": 0.051650844514369965, "learning_rate": 0.01, "loss": 2.0322, "step": 16857 }, { "epoch": 1.730295566502463, "grad_norm": 0.06828897446393967, "learning_rate": 0.01, "loss": 2.0456, "step": 16860 }, { "epoch": 1.730603448275862, "grad_norm": 0.10987185686826706, "learning_rate": 0.01, "loss": 2.0334, "step": 16863 }, { "epoch": 1.7309113300492611, "grad_norm": 0.055677276104688644, "learning_rate": 0.01, "loss": 2.018, "step": 16866 }, { "epoch": 1.7312192118226601, "grad_norm": 0.056478437036275864, "learning_rate": 0.01, "loss": 2.0241, "step": 16869 }, { "epoch": 1.7315270935960592, "grad_norm": 0.1247091144323349, "learning_rate": 0.01, "loss": 2.0128, "step": 16872 }, { "epoch": 1.7318349753694582, "grad_norm": 0.06603918969631195, "learning_rate": 0.01, "loss": 2.0456, "step": 16875 }, { "epoch": 1.7321428571428572, "grad_norm": 0.04174170270562172, "learning_rate": 0.01, "loss": 2.0349, "step": 16878 }, { "epoch": 1.7324507389162562, "grad_norm": 0.03841250762343407, "learning_rate": 0.01, "loss": 2.0365, "step": 16881 }, { "epoch": 1.7327586206896552, "grad_norm": 0.03429241105914116, "learning_rate": 0.01, "loss": 2.038, "step": 16884 }, { "epoch": 1.7330665024630543, "grad_norm": 0.05175672471523285, "learning_rate": 0.01, "loss": 2.0583, "step": 16887 }, { "epoch": 1.7333743842364533, "grad_norm": 0.06318958103656769, "learning_rate": 0.01, "loss": 2.0398, "step": 16890 }, { "epoch": 1.7336822660098523, "grad_norm": 0.08888188004493713, "learning_rate": 0.01, "loss": 2.0447, "step": 16893 }, { "epoch": 1.7339901477832513, "grad_norm": 0.04479747265577316, "learning_rate": 0.01, "loss": 2.0671, "step": 16896 }, { "epoch": 1.7342980295566504, "grad_norm": 0.05286455899477005, "learning_rate": 0.01, "loss": 2.0342, "step": 16899 }, { "epoch": 1.7346059113300494, "grad_norm": 0.04719952121376991, "learning_rate": 0.01, "loss": 2.0251, "step": 16902 }, { "epoch": 1.7349137931034484, "grad_norm": 0.06204066798090935, "learning_rate": 0.01, "loss": 2.0412, "step": 16905 }, { "epoch": 1.7352216748768474, "grad_norm": 0.1099078357219696, "learning_rate": 0.01, "loss": 2.008, "step": 16908 }, { "epoch": 1.7355295566502464, "grad_norm": 0.08311746269464493, "learning_rate": 0.01, "loss": 2.0248, "step": 16911 }, { "epoch": 1.7358374384236455, "grad_norm": 0.09649046510457993, "learning_rate": 0.01, "loss": 2.0585, "step": 16914 }, { "epoch": 1.7361453201970445, "grad_norm": 0.06716254353523254, "learning_rate": 0.01, "loss": 2.0128, "step": 16917 }, { "epoch": 1.7364532019704435, "grad_norm": 0.05400918424129486, "learning_rate": 0.01, "loss": 2.035, "step": 16920 }, { "epoch": 1.7367610837438425, "grad_norm": 0.06857965141534805, "learning_rate": 0.01, "loss": 2.0394, "step": 16923 }, { "epoch": 1.7370689655172413, "grad_norm": 0.05842095986008644, "learning_rate": 0.01, "loss": 2.0418, "step": 16926 }, { "epoch": 1.7373768472906403, "grad_norm": 0.06279005855321884, "learning_rate": 0.01, "loss": 2.0528, "step": 16929 }, { "epoch": 1.7376847290640394, "grad_norm": 0.04209805652499199, "learning_rate": 0.01, "loss": 2.0374, "step": 16932 }, { "epoch": 1.7379926108374384, "grad_norm": 0.07557601481676102, "learning_rate": 0.01, "loss": 2.0351, "step": 16935 }, { "epoch": 1.7383004926108374, "grad_norm": 0.05998634174466133, "learning_rate": 0.01, "loss": 2.0283, "step": 16938 }, { "epoch": 1.7386083743842364, "grad_norm": 0.05477887764573097, "learning_rate": 0.01, "loss": 2.0659, "step": 16941 }, { "epoch": 1.7389162561576355, "grad_norm": 0.09489591419696808, "learning_rate": 0.01, "loss": 2.0313, "step": 16944 }, { "epoch": 1.7392241379310345, "grad_norm": 0.13861320912837982, "learning_rate": 0.01, "loss": 2.0778, "step": 16947 }, { "epoch": 1.7395320197044335, "grad_norm": 0.1302078664302826, "learning_rate": 0.01, "loss": 2.024, "step": 16950 }, { "epoch": 1.7398399014778325, "grad_norm": 0.08335380256175995, "learning_rate": 0.01, "loss": 2.0612, "step": 16953 }, { "epoch": 1.7401477832512315, "grad_norm": 0.06901963800191879, "learning_rate": 0.01, "loss": 2.0357, "step": 16956 }, { "epoch": 1.7404556650246306, "grad_norm": 0.06619597226381302, "learning_rate": 0.01, "loss": 2.0207, "step": 16959 }, { "epoch": 1.7407635467980296, "grad_norm": 0.045459166169166565, "learning_rate": 0.01, "loss": 2.071, "step": 16962 }, { "epoch": 1.7410714285714286, "grad_norm": 0.06302090734243393, "learning_rate": 0.01, "loss": 2.0311, "step": 16965 }, { "epoch": 1.7413793103448276, "grad_norm": 0.07184530049562454, "learning_rate": 0.01, "loss": 2.0309, "step": 16968 }, { "epoch": 1.7416871921182266, "grad_norm": 0.07319154590368271, "learning_rate": 0.01, "loss": 2.0316, "step": 16971 }, { "epoch": 1.7419950738916257, "grad_norm": 0.0667872503399849, "learning_rate": 0.01, "loss": 2.0406, "step": 16974 }, { "epoch": 1.7423029556650245, "grad_norm": 0.0988057404756546, "learning_rate": 0.01, "loss": 2.0466, "step": 16977 }, { "epoch": 1.7426108374384235, "grad_norm": 0.09142420440912247, "learning_rate": 0.01, "loss": 2.0503, "step": 16980 }, { "epoch": 1.7429187192118225, "grad_norm": 0.07581423968076706, "learning_rate": 0.01, "loss": 2.0004, "step": 16983 }, { "epoch": 1.7432266009852215, "grad_norm": 0.05793863534927368, "learning_rate": 0.01, "loss": 2.0278, "step": 16986 }, { "epoch": 1.7435344827586206, "grad_norm": 0.0717446580529213, "learning_rate": 0.01, "loss": 2.061, "step": 16989 }, { "epoch": 1.7438423645320196, "grad_norm": 0.03572774678468704, "learning_rate": 0.01, "loss": 2.0663, "step": 16992 }, { "epoch": 1.7441502463054186, "grad_norm": 0.09349701553583145, "learning_rate": 0.01, "loss": 2.0433, "step": 16995 }, { "epoch": 1.7444581280788176, "grad_norm": 0.08374779671430588, "learning_rate": 0.01, "loss": 2.0228, "step": 16998 }, { "epoch": 1.7447660098522166, "grad_norm": 0.045029062777757645, "learning_rate": 0.01, "loss": 2.0425, "step": 17001 }, { "epoch": 1.7450738916256157, "grad_norm": 0.06359710544347763, "learning_rate": 0.01, "loss": 2.073, "step": 17004 }, { "epoch": 1.7453817733990147, "grad_norm": 0.0948299989104271, "learning_rate": 0.01, "loss": 2.0326, "step": 17007 }, { "epoch": 1.7456896551724137, "grad_norm": 0.05561600998044014, "learning_rate": 0.01, "loss": 2.0702, "step": 17010 }, { "epoch": 1.7459975369458127, "grad_norm": 0.05672406032681465, "learning_rate": 0.01, "loss": 2.0591, "step": 17013 }, { "epoch": 1.7463054187192117, "grad_norm": 0.047100841999053955, "learning_rate": 0.01, "loss": 2.0597, "step": 17016 }, { "epoch": 1.7466133004926108, "grad_norm": 0.09477028995752335, "learning_rate": 0.01, "loss": 2.046, "step": 17019 }, { "epoch": 1.7469211822660098, "grad_norm": 0.07235399633646011, "learning_rate": 0.01, "loss": 2.0778, "step": 17022 }, { "epoch": 1.7472290640394088, "grad_norm": 0.08015649020671844, "learning_rate": 0.01, "loss": 2.0219, "step": 17025 }, { "epoch": 1.7475369458128078, "grad_norm": 0.07459922134876251, "learning_rate": 0.01, "loss": 2.0278, "step": 17028 }, { "epoch": 1.7478448275862069, "grad_norm": 0.10745642334222794, "learning_rate": 0.01, "loss": 2.0536, "step": 17031 }, { "epoch": 1.7481527093596059, "grad_norm": 0.049479395151138306, "learning_rate": 0.01, "loss": 2.0477, "step": 17034 }, { "epoch": 1.748460591133005, "grad_norm": 0.03935602307319641, "learning_rate": 0.01, "loss": 2.0495, "step": 17037 }, { "epoch": 1.748768472906404, "grad_norm": 0.05755804106593132, "learning_rate": 0.01, "loss": 2.0186, "step": 17040 }, { "epoch": 1.749076354679803, "grad_norm": 0.07461614906787872, "learning_rate": 0.01, "loss": 2.0577, "step": 17043 }, { "epoch": 1.749384236453202, "grad_norm": 0.07078621536493301, "learning_rate": 0.01, "loss": 2.0159, "step": 17046 }, { "epoch": 1.749692118226601, "grad_norm": 0.12035417556762695, "learning_rate": 0.01, "loss": 2.0549, "step": 17049 }, { "epoch": 1.75, "grad_norm": 0.054343827068805695, "learning_rate": 0.01, "loss": 2.0394, "step": 17052 }, { "epoch": 1.750307881773399, "grad_norm": 0.056529540568590164, "learning_rate": 0.01, "loss": 2.0604, "step": 17055 }, { "epoch": 1.750615763546798, "grad_norm": 0.09392616152763367, "learning_rate": 0.01, "loss": 2.0233, "step": 17058 }, { "epoch": 1.750923645320197, "grad_norm": 0.0874391570687294, "learning_rate": 0.01, "loss": 2.0733, "step": 17061 }, { "epoch": 1.751231527093596, "grad_norm": 0.03889552876353264, "learning_rate": 0.01, "loss": 2.0099, "step": 17064 }, { "epoch": 1.751539408866995, "grad_norm": 0.06299902498722076, "learning_rate": 0.01, "loss": 2.01, "step": 17067 }, { "epoch": 1.7518472906403941, "grad_norm": 0.05655315890908241, "learning_rate": 0.01, "loss": 2.0571, "step": 17070 }, { "epoch": 1.7521551724137931, "grad_norm": 0.04646646976470947, "learning_rate": 0.01, "loss": 2.0401, "step": 17073 }, { "epoch": 1.7524630541871922, "grad_norm": 0.04910219460725784, "learning_rate": 0.01, "loss": 2.0527, "step": 17076 }, { "epoch": 1.7527709359605912, "grad_norm": 0.03616022691130638, "learning_rate": 0.01, "loss": 2.0152, "step": 17079 }, { "epoch": 1.7530788177339902, "grad_norm": 0.05539899319410324, "learning_rate": 0.01, "loss": 2.029, "step": 17082 }, { "epoch": 1.7533866995073892, "grad_norm": 0.09409206360578537, "learning_rate": 0.01, "loss": 2.0621, "step": 17085 }, { "epoch": 1.7536945812807883, "grad_norm": 0.048882581293582916, "learning_rate": 0.01, "loss": 2.057, "step": 17088 }, { "epoch": 1.7540024630541873, "grad_norm": 0.09687826037406921, "learning_rate": 0.01, "loss": 2.0472, "step": 17091 }, { "epoch": 1.7543103448275863, "grad_norm": 0.06646592915058136, "learning_rate": 0.01, "loss": 2.0469, "step": 17094 }, { "epoch": 1.7546182266009853, "grad_norm": 0.07316572964191437, "learning_rate": 0.01, "loss": 2.0427, "step": 17097 }, { "epoch": 1.7549261083743843, "grad_norm": 0.058037593960762024, "learning_rate": 0.01, "loss": 2.0462, "step": 17100 }, { "epoch": 1.7552339901477834, "grad_norm": 0.05486461892724037, "learning_rate": 0.01, "loss": 2.0426, "step": 17103 }, { "epoch": 1.7555418719211824, "grad_norm": 0.0397610180079937, "learning_rate": 0.01, "loss": 2.0397, "step": 17106 }, { "epoch": 1.7558497536945814, "grad_norm": 0.11639061570167542, "learning_rate": 0.01, "loss": 2.0248, "step": 17109 }, { "epoch": 1.7561576354679804, "grad_norm": 0.04207361862063408, "learning_rate": 0.01, "loss": 2.064, "step": 17112 }, { "epoch": 1.7564655172413794, "grad_norm": 0.09336013346910477, "learning_rate": 0.01, "loss": 2.0399, "step": 17115 }, { "epoch": 1.7567733990147785, "grad_norm": 0.059693820774555206, "learning_rate": 0.01, "loss": 2.0347, "step": 17118 }, { "epoch": 1.7570812807881775, "grad_norm": 0.05269778147339821, "learning_rate": 0.01, "loss": 2.0222, "step": 17121 }, { "epoch": 1.7573891625615765, "grad_norm": 0.0548628568649292, "learning_rate": 0.01, "loss": 2.0269, "step": 17124 }, { "epoch": 1.7576970443349755, "grad_norm": 0.049783483147621155, "learning_rate": 0.01, "loss": 2.0469, "step": 17127 }, { "epoch": 1.7580049261083743, "grad_norm": 0.11240525543689728, "learning_rate": 0.01, "loss": 2.0405, "step": 17130 }, { "epoch": 1.7583128078817734, "grad_norm": 0.04133368283510208, "learning_rate": 0.01, "loss": 2.064, "step": 17133 }, { "epoch": 1.7586206896551724, "grad_norm": 0.042926132678985596, "learning_rate": 0.01, "loss": 2.0685, "step": 17136 }, { "epoch": 1.7589285714285714, "grad_norm": 0.053613241761922836, "learning_rate": 0.01, "loss": 2.0241, "step": 17139 }, { "epoch": 1.7592364532019704, "grad_norm": 0.03950737044215202, "learning_rate": 0.01, "loss": 2.0662, "step": 17142 }, { "epoch": 1.7595443349753694, "grad_norm": 0.045378413051366806, "learning_rate": 0.01, "loss": 2.0401, "step": 17145 }, { "epoch": 1.7598522167487685, "grad_norm": 0.036304816603660583, "learning_rate": 0.01, "loss": 2.0789, "step": 17148 }, { "epoch": 1.7601600985221675, "grad_norm": 0.03886290267109871, "learning_rate": 0.01, "loss": 2.0516, "step": 17151 }, { "epoch": 1.7604679802955665, "grad_norm": 0.0885484591126442, "learning_rate": 0.01, "loss": 2.0333, "step": 17154 }, { "epoch": 1.7607758620689655, "grad_norm": 0.06733599305152893, "learning_rate": 0.01, "loss": 2.039, "step": 17157 }, { "epoch": 1.7610837438423645, "grad_norm": 0.10319662094116211, "learning_rate": 0.01, "loss": 2.0629, "step": 17160 }, { "epoch": 1.7613916256157636, "grad_norm": 0.047492869198322296, "learning_rate": 0.01, "loss": 2.0726, "step": 17163 }, { "epoch": 1.7616995073891626, "grad_norm": 0.04345547780394554, "learning_rate": 0.01, "loss": 2.0259, "step": 17166 }, { "epoch": 1.7620073891625616, "grad_norm": 0.0452197827398777, "learning_rate": 0.01, "loss": 2.0393, "step": 17169 }, { "epoch": 1.7623152709359606, "grad_norm": 0.0703844428062439, "learning_rate": 0.01, "loss": 2.0458, "step": 17172 }, { "epoch": 1.7626231527093597, "grad_norm": 0.07864879071712494, "learning_rate": 0.01, "loss": 2.047, "step": 17175 }, { "epoch": 1.7629310344827587, "grad_norm": 0.1282995641231537, "learning_rate": 0.01, "loss": 2.0226, "step": 17178 }, { "epoch": 1.7632389162561575, "grad_norm": 0.0837298184633255, "learning_rate": 0.01, "loss": 2.0483, "step": 17181 }, { "epoch": 1.7635467980295565, "grad_norm": 0.05081562697887421, "learning_rate": 0.01, "loss": 2.0656, "step": 17184 }, { "epoch": 1.7638546798029555, "grad_norm": 0.07952243834733963, "learning_rate": 0.01, "loss": 2.0561, "step": 17187 }, { "epoch": 1.7641625615763545, "grad_norm": 0.06592147797346115, "learning_rate": 0.01, "loss": 2.0311, "step": 17190 }, { "epoch": 1.7644704433497536, "grad_norm": 0.04341195523738861, "learning_rate": 0.01, "loss": 2.0413, "step": 17193 }, { "epoch": 1.7647783251231526, "grad_norm": 0.04649266228079796, "learning_rate": 0.01, "loss": 2.0338, "step": 17196 }, { "epoch": 1.7650862068965516, "grad_norm": 0.04569242149591446, "learning_rate": 0.01, "loss": 2.0428, "step": 17199 }, { "epoch": 1.7653940886699506, "grad_norm": 0.040291350334882736, "learning_rate": 0.01, "loss": 2.0165, "step": 17202 }, { "epoch": 1.7657019704433496, "grad_norm": 0.05328141525387764, "learning_rate": 0.01, "loss": 2.0384, "step": 17205 }, { "epoch": 1.7660098522167487, "grad_norm": 0.04405885562300682, "learning_rate": 0.01, "loss": 2.0566, "step": 17208 }, { "epoch": 1.7663177339901477, "grad_norm": 0.06635614484548569, "learning_rate": 0.01, "loss": 2.0397, "step": 17211 }, { "epoch": 1.7666256157635467, "grad_norm": 0.09231774508953094, "learning_rate": 0.01, "loss": 2.0247, "step": 17214 }, { "epoch": 1.7669334975369457, "grad_norm": 0.056320998817682266, "learning_rate": 0.01, "loss": 2.066, "step": 17217 }, { "epoch": 1.7672413793103448, "grad_norm": 0.049784105271101, "learning_rate": 0.01, "loss": 2.0175, "step": 17220 }, { "epoch": 1.7675492610837438, "grad_norm": 0.03728071227669716, "learning_rate": 0.01, "loss": 2.0403, "step": 17223 }, { "epoch": 1.7678571428571428, "grad_norm": 0.06607525050640106, "learning_rate": 0.01, "loss": 2.0364, "step": 17226 }, { "epoch": 1.7681650246305418, "grad_norm": 0.07367686927318573, "learning_rate": 0.01, "loss": 2.0478, "step": 17229 }, { "epoch": 1.7684729064039408, "grad_norm": 0.039499782025814056, "learning_rate": 0.01, "loss": 2.0508, "step": 17232 }, { "epoch": 1.7687807881773399, "grad_norm": 0.04863186180591583, "learning_rate": 0.01, "loss": 2.0595, "step": 17235 }, { "epoch": 1.7690886699507389, "grad_norm": 0.03877348452806473, "learning_rate": 0.01, "loss": 2.0608, "step": 17238 }, { "epoch": 1.769396551724138, "grad_norm": 0.049965135753154755, "learning_rate": 0.01, "loss": 2.0521, "step": 17241 }, { "epoch": 1.769704433497537, "grad_norm": 0.0697547048330307, "learning_rate": 0.01, "loss": 2.0191, "step": 17244 }, { "epoch": 1.770012315270936, "grad_norm": 0.0562531016767025, "learning_rate": 0.01, "loss": 2.0581, "step": 17247 }, { "epoch": 1.770320197044335, "grad_norm": 0.12931805849075317, "learning_rate": 0.01, "loss": 2.072, "step": 17250 }, { "epoch": 1.770628078817734, "grad_norm": 0.06590058654546738, "learning_rate": 0.01, "loss": 2.0487, "step": 17253 }, { "epoch": 1.770935960591133, "grad_norm": 0.045246463268995285, "learning_rate": 0.01, "loss": 2.0424, "step": 17256 }, { "epoch": 1.771243842364532, "grad_norm": 0.03972258046269417, "learning_rate": 0.01, "loss": 2.043, "step": 17259 }, { "epoch": 1.771551724137931, "grad_norm": 0.030682874843478203, "learning_rate": 0.01, "loss": 2.0665, "step": 17262 }, { "epoch": 1.77185960591133, "grad_norm": 0.08989464491605759, "learning_rate": 0.01, "loss": 2.042, "step": 17265 }, { "epoch": 1.772167487684729, "grad_norm": 0.05595966801047325, "learning_rate": 0.01, "loss": 2.0399, "step": 17268 }, { "epoch": 1.7724753694581281, "grad_norm": 0.16923703253269196, "learning_rate": 0.01, "loss": 2.0161, "step": 17271 }, { "epoch": 1.7727832512315271, "grad_norm": 0.08722022920846939, "learning_rate": 0.01, "loss": 2.0379, "step": 17274 }, { "epoch": 1.7730911330049262, "grad_norm": 0.0741046667098999, "learning_rate": 0.01, "loss": 2.0512, "step": 17277 }, { "epoch": 1.7733990147783252, "grad_norm": 0.06061973422765732, "learning_rate": 0.01, "loss": 2.0318, "step": 17280 }, { "epoch": 1.7737068965517242, "grad_norm": 0.036843594163656235, "learning_rate": 0.01, "loss": 2.056, "step": 17283 }, { "epoch": 1.7740147783251232, "grad_norm": 0.03937767818570137, "learning_rate": 0.01, "loss": 2.019, "step": 17286 }, { "epoch": 1.7743226600985222, "grad_norm": 0.03801162540912628, "learning_rate": 0.01, "loss": 2.04, "step": 17289 }, { "epoch": 1.7746305418719213, "grad_norm": 0.045572392642498016, "learning_rate": 0.01, "loss": 2.0665, "step": 17292 }, { "epoch": 1.7749384236453203, "grad_norm": 0.06430240720510483, "learning_rate": 0.01, "loss": 2.0357, "step": 17295 }, { "epoch": 1.7752463054187193, "grad_norm": 0.09266401827335358, "learning_rate": 0.01, "loss": 2.0474, "step": 17298 }, { "epoch": 1.7755541871921183, "grad_norm": 0.09686179459095001, "learning_rate": 0.01, "loss": 2.0224, "step": 17301 }, { "epoch": 1.7758620689655173, "grad_norm": 0.04640132188796997, "learning_rate": 0.01, "loss": 2.0611, "step": 17304 }, { "epoch": 1.7761699507389164, "grad_norm": 0.03891894221305847, "learning_rate": 0.01, "loss": 2.0529, "step": 17307 }, { "epoch": 1.7764778325123154, "grad_norm": 0.06023077294230461, "learning_rate": 0.01, "loss": 2.0282, "step": 17310 }, { "epoch": 1.7767857142857144, "grad_norm": 0.12215135246515274, "learning_rate": 0.01, "loss": 2.0472, "step": 17313 }, { "epoch": 1.7770935960591134, "grad_norm": 0.04197768121957779, "learning_rate": 0.01, "loss": 2.038, "step": 17316 }, { "epoch": 1.7774014778325125, "grad_norm": 0.0429445244371891, "learning_rate": 0.01, "loss": 2.0291, "step": 17319 }, { "epoch": 1.7777093596059115, "grad_norm": 0.04674970358610153, "learning_rate": 0.01, "loss": 2.0493, "step": 17322 }, { "epoch": 1.7780172413793105, "grad_norm": 0.11712675541639328, "learning_rate": 0.01, "loss": 2.0421, "step": 17325 }, { "epoch": 1.7783251231527095, "grad_norm": 0.04812907800078392, "learning_rate": 0.01, "loss": 2.0395, "step": 17328 }, { "epoch": 1.7786330049261085, "grad_norm": 0.04147825017571449, "learning_rate": 0.01, "loss": 2.0057, "step": 17331 }, { "epoch": 1.7789408866995073, "grad_norm": 0.07262876629829407, "learning_rate": 0.01, "loss": 2.0383, "step": 17334 }, { "epoch": 1.7792487684729064, "grad_norm": 0.08528011292219162, "learning_rate": 0.01, "loss": 2.0151, "step": 17337 }, { "epoch": 1.7795566502463054, "grad_norm": 0.046615902334451675, "learning_rate": 0.01, "loss": 2.0368, "step": 17340 }, { "epoch": 1.7798645320197044, "grad_norm": 0.06018273904919624, "learning_rate": 0.01, "loss": 2.0411, "step": 17343 }, { "epoch": 1.7801724137931034, "grad_norm": 0.07272887974977493, "learning_rate": 0.01, "loss": 2.026, "step": 17346 }, { "epoch": 1.7804802955665024, "grad_norm": 0.07152794301509857, "learning_rate": 0.01, "loss": 2.0631, "step": 17349 }, { "epoch": 1.7807881773399015, "grad_norm": 0.07950329035520554, "learning_rate": 0.01, "loss": 2.0435, "step": 17352 }, { "epoch": 1.7810960591133005, "grad_norm": 0.040778059512376785, "learning_rate": 0.01, "loss": 2.0089, "step": 17355 }, { "epoch": 1.7814039408866995, "grad_norm": 0.06180460751056671, "learning_rate": 0.01, "loss": 2.0183, "step": 17358 }, { "epoch": 1.7817118226600985, "grad_norm": 0.06950334459543228, "learning_rate": 0.01, "loss": 2.0352, "step": 17361 }, { "epoch": 1.7820197044334976, "grad_norm": 0.037724483758211136, "learning_rate": 0.01, "loss": 2.0324, "step": 17364 }, { "epoch": 1.7823275862068966, "grad_norm": 0.05991238355636597, "learning_rate": 0.01, "loss": 2.053, "step": 17367 }, { "epoch": 1.7826354679802956, "grad_norm": 0.047278665006160736, "learning_rate": 0.01, "loss": 2.0427, "step": 17370 }, { "epoch": 1.7829433497536946, "grad_norm": 0.05376293137669563, "learning_rate": 0.01, "loss": 2.0315, "step": 17373 }, { "epoch": 1.7832512315270936, "grad_norm": 0.04049403965473175, "learning_rate": 0.01, "loss": 2.0483, "step": 17376 }, { "epoch": 1.7835591133004927, "grad_norm": 0.04954640567302704, "learning_rate": 0.01, "loss": 2.0393, "step": 17379 }, { "epoch": 1.7838669950738915, "grad_norm": 0.049089133739471436, "learning_rate": 0.01, "loss": 2.0633, "step": 17382 }, { "epoch": 1.7841748768472905, "grad_norm": 0.0531185045838356, "learning_rate": 0.01, "loss": 2.0474, "step": 17385 }, { "epoch": 1.7844827586206895, "grad_norm": 0.060973040759563446, "learning_rate": 0.01, "loss": 2.0219, "step": 17388 }, { "epoch": 1.7847906403940885, "grad_norm": 0.044274650514125824, "learning_rate": 0.01, "loss": 2.0403, "step": 17391 }, { "epoch": 1.7850985221674875, "grad_norm": 0.08154580742120743, "learning_rate": 0.01, "loss": 2.011, "step": 17394 }, { "epoch": 1.7854064039408866, "grad_norm": 0.05253531411290169, "learning_rate": 0.01, "loss": 2.0352, "step": 17397 }, { "epoch": 1.7857142857142856, "grad_norm": 0.056620582938194275, "learning_rate": 0.01, "loss": 2.04, "step": 17400 }, { "epoch": 1.7860221674876846, "grad_norm": 0.069371297955513, "learning_rate": 0.01, "loss": 2.0456, "step": 17403 }, { "epoch": 1.7863300492610836, "grad_norm": 0.04726189747452736, "learning_rate": 0.01, "loss": 2.018, "step": 17406 }, { "epoch": 1.7866379310344827, "grad_norm": 0.11150949448347092, "learning_rate": 0.01, "loss": 2.0503, "step": 17409 }, { "epoch": 1.7869458128078817, "grad_norm": 0.07482532411813736, "learning_rate": 0.01, "loss": 2.0361, "step": 17412 }, { "epoch": 1.7872536945812807, "grad_norm": 0.03803645819425583, "learning_rate": 0.01, "loss": 2.0555, "step": 17415 }, { "epoch": 1.7875615763546797, "grad_norm": 0.08635829389095306, "learning_rate": 0.01, "loss": 2.0551, "step": 17418 }, { "epoch": 1.7878694581280787, "grad_norm": 0.08558929711580276, "learning_rate": 0.01, "loss": 2.0611, "step": 17421 }, { "epoch": 1.7881773399014778, "grad_norm": 0.051051054149866104, "learning_rate": 0.01, "loss": 2.0375, "step": 17424 }, { "epoch": 1.7884852216748768, "grad_norm": 0.0584864616394043, "learning_rate": 0.01, "loss": 2.0131, "step": 17427 }, { "epoch": 1.7887931034482758, "grad_norm": 0.04015490040183067, "learning_rate": 0.01, "loss": 2.0559, "step": 17430 }, { "epoch": 1.7891009852216748, "grad_norm": 0.0499749630689621, "learning_rate": 0.01, "loss": 2.0611, "step": 17433 }, { "epoch": 1.7894088669950738, "grad_norm": 0.08796360343694687, "learning_rate": 0.01, "loss": 2.0538, "step": 17436 }, { "epoch": 1.7897167487684729, "grad_norm": 0.08200754970312119, "learning_rate": 0.01, "loss": 2.0459, "step": 17439 }, { "epoch": 1.7900246305418719, "grad_norm": 0.09300393611192703, "learning_rate": 0.01, "loss": 2.051, "step": 17442 }, { "epoch": 1.790332512315271, "grad_norm": 0.08223576098680496, "learning_rate": 0.01, "loss": 2.0194, "step": 17445 }, { "epoch": 1.79064039408867, "grad_norm": 0.05235210806131363, "learning_rate": 0.01, "loss": 2.0421, "step": 17448 }, { "epoch": 1.790948275862069, "grad_norm": 0.047677502036094666, "learning_rate": 0.01, "loss": 2.043, "step": 17451 }, { "epoch": 1.791256157635468, "grad_norm": 0.044341955333948135, "learning_rate": 0.01, "loss": 2.0381, "step": 17454 }, { "epoch": 1.791564039408867, "grad_norm": 0.09555595368146896, "learning_rate": 0.01, "loss": 2.0224, "step": 17457 }, { "epoch": 1.791871921182266, "grad_norm": 0.05652477219700813, "learning_rate": 0.01, "loss": 2.0318, "step": 17460 }, { "epoch": 1.792179802955665, "grad_norm": 0.0979117676615715, "learning_rate": 0.01, "loss": 2.0747, "step": 17463 }, { "epoch": 1.792487684729064, "grad_norm": 0.0674947127699852, "learning_rate": 0.01, "loss": 2.0723, "step": 17466 }, { "epoch": 1.792795566502463, "grad_norm": 0.05617907643318176, "learning_rate": 0.01, "loss": 2.0444, "step": 17469 }, { "epoch": 1.793103448275862, "grad_norm": 0.10979234427213669, "learning_rate": 0.01, "loss": 2.0638, "step": 17472 }, { "epoch": 1.7934113300492611, "grad_norm": 0.056006476283073425, "learning_rate": 0.01, "loss": 2.0396, "step": 17475 }, { "epoch": 1.7937192118226601, "grad_norm": 0.10030517727136612, "learning_rate": 0.01, "loss": 2.0379, "step": 17478 }, { "epoch": 1.7940270935960592, "grad_norm": 0.042350657284259796, "learning_rate": 0.01, "loss": 2.0319, "step": 17481 }, { "epoch": 1.7943349753694582, "grad_norm": 0.03725098446011543, "learning_rate": 0.01, "loss": 2.0537, "step": 17484 }, { "epoch": 1.7946428571428572, "grad_norm": 0.09215757250785828, "learning_rate": 0.01, "loss": 2.0247, "step": 17487 }, { "epoch": 1.7949507389162562, "grad_norm": 0.08012344688177109, "learning_rate": 0.01, "loss": 2.0428, "step": 17490 }, { "epoch": 1.7952586206896552, "grad_norm": 0.128404900431633, "learning_rate": 0.01, "loss": 2.038, "step": 17493 }, { "epoch": 1.7955665024630543, "grad_norm": 0.08718766272068024, "learning_rate": 0.01, "loss": 2.0557, "step": 17496 }, { "epoch": 1.7958743842364533, "grad_norm": 0.030426733195781708, "learning_rate": 0.01, "loss": 2.0192, "step": 17499 }, { "epoch": 1.7961822660098523, "grad_norm": 0.03950949385762215, "learning_rate": 0.01, "loss": 2.0228, "step": 17502 }, { "epoch": 1.7964901477832513, "grad_norm": 0.049466658383607864, "learning_rate": 0.01, "loss": 2.0514, "step": 17505 }, { "epoch": 1.7967980295566504, "grad_norm": 0.06188172101974487, "learning_rate": 0.01, "loss": 2.0512, "step": 17508 }, { "epoch": 1.7971059113300494, "grad_norm": 0.06420351564884186, "learning_rate": 0.01, "loss": 2.0365, "step": 17511 }, { "epoch": 1.7974137931034484, "grad_norm": 0.04329871013760567, "learning_rate": 0.01, "loss": 2.0511, "step": 17514 }, { "epoch": 1.7977216748768474, "grad_norm": 0.04420280084013939, "learning_rate": 0.01, "loss": 2.0481, "step": 17517 }, { "epoch": 1.7980295566502464, "grad_norm": 0.04043954238295555, "learning_rate": 0.01, "loss": 2.0184, "step": 17520 }, { "epoch": 1.7983374384236455, "grad_norm": 0.049305226653814316, "learning_rate": 0.01, "loss": 2.0353, "step": 17523 }, { "epoch": 1.7986453201970445, "grad_norm": 0.1928088515996933, "learning_rate": 0.01, "loss": 2.0869, "step": 17526 }, { "epoch": 1.7989532019704435, "grad_norm": 0.12283357232809067, "learning_rate": 0.01, "loss": 2.0378, "step": 17529 }, { "epoch": 1.7992610837438425, "grad_norm": 0.07897382229566574, "learning_rate": 0.01, "loss": 2.045, "step": 17532 }, { "epoch": 1.7995689655172413, "grad_norm": 0.0749836266040802, "learning_rate": 0.01, "loss": 2.0388, "step": 17535 }, { "epoch": 1.7998768472906403, "grad_norm": 0.06578727811574936, "learning_rate": 0.01, "loss": 2.0575, "step": 17538 }, { "epoch": 1.8001847290640394, "grad_norm": 0.06609571725130081, "learning_rate": 0.01, "loss": 2.0448, "step": 17541 }, { "epoch": 1.8004926108374384, "grad_norm": 0.047696053981781006, "learning_rate": 0.01, "loss": 2.0574, "step": 17544 }, { "epoch": 1.8008004926108374, "grad_norm": 0.05110754072666168, "learning_rate": 0.01, "loss": 2.0191, "step": 17547 }, { "epoch": 1.8011083743842364, "grad_norm": 0.03783520683646202, "learning_rate": 0.01, "loss": 2.0328, "step": 17550 }, { "epoch": 1.8014162561576355, "grad_norm": 0.03145405650138855, "learning_rate": 0.01, "loss": 2.0373, "step": 17553 }, { "epoch": 1.8017241379310345, "grad_norm": 0.09492892026901245, "learning_rate": 0.01, "loss": 2.0173, "step": 17556 }, { "epoch": 1.8020320197044335, "grad_norm": 0.06920488178730011, "learning_rate": 0.01, "loss": 2.0809, "step": 17559 }, { "epoch": 1.8023399014778325, "grad_norm": 0.0583655945956707, "learning_rate": 0.01, "loss": 2.0259, "step": 17562 }, { "epoch": 1.8026477832512315, "grad_norm": 0.08449242264032364, "learning_rate": 0.01, "loss": 2.0205, "step": 17565 }, { "epoch": 1.8029556650246306, "grad_norm": 0.12186135351657867, "learning_rate": 0.01, "loss": 2.0076, "step": 17568 }, { "epoch": 1.8032635467980296, "grad_norm": 0.09926268458366394, "learning_rate": 0.01, "loss": 2.0444, "step": 17571 }, { "epoch": 1.8035714285714286, "grad_norm": 0.06820474565029144, "learning_rate": 0.01, "loss": 2.0211, "step": 17574 }, { "epoch": 1.8038793103448276, "grad_norm": 0.050847604870796204, "learning_rate": 0.01, "loss": 2.0377, "step": 17577 }, { "epoch": 1.8041871921182266, "grad_norm": 0.053053803741931915, "learning_rate": 0.01, "loss": 2.0462, "step": 17580 }, { "epoch": 1.8044950738916257, "grad_norm": 0.047114890068769455, "learning_rate": 0.01, "loss": 2.0171, "step": 17583 }, { "epoch": 1.8048029556650245, "grad_norm": 0.05182573199272156, "learning_rate": 0.01, "loss": 2.0396, "step": 17586 }, { "epoch": 1.8051108374384235, "grad_norm": 0.12609605491161346, "learning_rate": 0.01, "loss": 2.053, "step": 17589 }, { "epoch": 1.8054187192118225, "grad_norm": 0.0496569462120533, "learning_rate": 0.01, "loss": 2.0418, "step": 17592 }, { "epoch": 1.8057266009852215, "grad_norm": 0.0490572527050972, "learning_rate": 0.01, "loss": 2.0199, "step": 17595 }, { "epoch": 1.8060344827586206, "grad_norm": 0.038300756365060806, "learning_rate": 0.01, "loss": 2.0337, "step": 17598 }, { "epoch": 1.8063423645320196, "grad_norm": 0.03666609153151512, "learning_rate": 0.01, "loss": 2.0392, "step": 17601 }, { "epoch": 1.8066502463054186, "grad_norm": 0.036330632865428925, "learning_rate": 0.01, "loss": 2.0319, "step": 17604 }, { "epoch": 1.8069581280788176, "grad_norm": 0.0605342797935009, "learning_rate": 0.01, "loss": 2.0356, "step": 17607 }, { "epoch": 1.8072660098522166, "grad_norm": 0.04346880316734314, "learning_rate": 0.01, "loss": 2.0188, "step": 17610 }, { "epoch": 1.8075738916256157, "grad_norm": 0.06400660425424576, "learning_rate": 0.01, "loss": 2.0342, "step": 17613 }, { "epoch": 1.8078817733990147, "grad_norm": 0.0812198668718338, "learning_rate": 0.01, "loss": 2.0622, "step": 17616 }, { "epoch": 1.8081896551724137, "grad_norm": 0.06756972521543503, "learning_rate": 0.01, "loss": 2.0396, "step": 17619 }, { "epoch": 1.8084975369458127, "grad_norm": 0.05277147516608238, "learning_rate": 0.01, "loss": 2.0149, "step": 17622 }, { "epoch": 1.8088054187192117, "grad_norm": 0.07904385775327682, "learning_rate": 0.01, "loss": 2.0393, "step": 17625 }, { "epoch": 1.8091133004926108, "grad_norm": 0.06955704092979431, "learning_rate": 0.01, "loss": 2.0433, "step": 17628 }, { "epoch": 1.8094211822660098, "grad_norm": 0.06605497002601624, "learning_rate": 0.01, "loss": 2.0439, "step": 17631 }, { "epoch": 1.8097290640394088, "grad_norm": 0.03861093521118164, "learning_rate": 0.01, "loss": 2.03, "step": 17634 }, { "epoch": 1.8100369458128078, "grad_norm": 0.04323074221611023, "learning_rate": 0.01, "loss": 2.0444, "step": 17637 }, { "epoch": 1.8103448275862069, "grad_norm": 0.03443233296275139, "learning_rate": 0.01, "loss": 2.0466, "step": 17640 }, { "epoch": 1.8106527093596059, "grad_norm": 0.04190131649374962, "learning_rate": 0.01, "loss": 2.0307, "step": 17643 }, { "epoch": 1.810960591133005, "grad_norm": 0.09095717966556549, "learning_rate": 0.01, "loss": 2.0529, "step": 17646 }, { "epoch": 1.811268472906404, "grad_norm": 0.05452005937695503, "learning_rate": 0.01, "loss": 2.0337, "step": 17649 }, { "epoch": 1.811576354679803, "grad_norm": 0.05032350867986679, "learning_rate": 0.01, "loss": 2.0398, "step": 17652 }, { "epoch": 1.811884236453202, "grad_norm": 0.05733015760779381, "learning_rate": 0.01, "loss": 2.0573, "step": 17655 }, { "epoch": 1.812192118226601, "grad_norm": 0.09373817592859268, "learning_rate": 0.01, "loss": 2.0278, "step": 17658 }, { "epoch": 1.8125, "grad_norm": 0.07385890185832977, "learning_rate": 0.01, "loss": 2.032, "step": 17661 }, { "epoch": 1.812807881773399, "grad_norm": 0.08643963187932968, "learning_rate": 0.01, "loss": 2.0351, "step": 17664 }, { "epoch": 1.813115763546798, "grad_norm": 0.09909530729055405, "learning_rate": 0.01, "loss": 2.0562, "step": 17667 }, { "epoch": 1.813423645320197, "grad_norm": 0.04600978642702103, "learning_rate": 0.01, "loss": 2.0219, "step": 17670 }, { "epoch": 1.813731527093596, "grad_norm": 0.033060222864151, "learning_rate": 0.01, "loss": 2.0479, "step": 17673 }, { "epoch": 1.814039408866995, "grad_norm": 0.03789517655968666, "learning_rate": 0.01, "loss": 2.0242, "step": 17676 }, { "epoch": 1.8143472906403941, "grad_norm": 0.0502844899892807, "learning_rate": 0.01, "loss": 2.0519, "step": 17679 }, { "epoch": 1.8146551724137931, "grad_norm": 0.0627695843577385, "learning_rate": 0.01, "loss": 2.0327, "step": 17682 }, { "epoch": 1.8149630541871922, "grad_norm": 0.15737055242061615, "learning_rate": 0.01, "loss": 2.0572, "step": 17685 }, { "epoch": 1.8152709359605912, "grad_norm": 0.09944868832826614, "learning_rate": 0.01, "loss": 2.0462, "step": 17688 }, { "epoch": 1.8155788177339902, "grad_norm": 0.12345952540636063, "learning_rate": 0.01, "loss": 2.0447, "step": 17691 }, { "epoch": 1.8158866995073892, "grad_norm": 0.06330909579992294, "learning_rate": 0.01, "loss": 2.0511, "step": 17694 }, { "epoch": 1.8161945812807883, "grad_norm": 0.0584748238325119, "learning_rate": 0.01, "loss": 2.0164, "step": 17697 }, { "epoch": 1.8165024630541873, "grad_norm": 0.07284627109766006, "learning_rate": 0.01, "loss": 2.0308, "step": 17700 }, { "epoch": 1.8168103448275863, "grad_norm": 0.07302995771169662, "learning_rate": 0.01, "loss": 2.0347, "step": 17703 }, { "epoch": 1.8171182266009853, "grad_norm": 0.06292667984962463, "learning_rate": 0.01, "loss": 2.026, "step": 17706 }, { "epoch": 1.8174261083743843, "grad_norm": 0.04821958392858505, "learning_rate": 0.01, "loss": 2.033, "step": 17709 }, { "epoch": 1.8177339901477834, "grad_norm": 0.03572079911828041, "learning_rate": 0.01, "loss": 2.047, "step": 17712 }, { "epoch": 1.8180418719211824, "grad_norm": 0.12643416225910187, "learning_rate": 0.01, "loss": 2.0621, "step": 17715 }, { "epoch": 1.8183497536945814, "grad_norm": 0.08803770691156387, "learning_rate": 0.01, "loss": 2.0422, "step": 17718 }, { "epoch": 1.8186576354679804, "grad_norm": 0.061583418399095535, "learning_rate": 0.01, "loss": 1.9895, "step": 17721 }, { "epoch": 1.8189655172413794, "grad_norm": 0.04947415366768837, "learning_rate": 0.01, "loss": 2.0249, "step": 17724 }, { "epoch": 1.8192733990147785, "grad_norm": 0.06042906641960144, "learning_rate": 0.01, "loss": 2.0563, "step": 17727 }, { "epoch": 1.8195812807881775, "grad_norm": 0.03236406669020653, "learning_rate": 0.01, "loss": 2.0482, "step": 17730 }, { "epoch": 1.8198891625615765, "grad_norm": 0.05975859984755516, "learning_rate": 0.01, "loss": 2.0353, "step": 17733 }, { "epoch": 1.8201970443349755, "grad_norm": 0.11028258502483368, "learning_rate": 0.01, "loss": 2.0654, "step": 17736 }, { "epoch": 1.8205049261083743, "grad_norm": 0.055842846632003784, "learning_rate": 0.01, "loss": 2.0589, "step": 17739 }, { "epoch": 1.8208128078817734, "grad_norm": 0.09189102053642273, "learning_rate": 0.01, "loss": 2.0444, "step": 17742 }, { "epoch": 1.8211206896551724, "grad_norm": 0.07795927673578262, "learning_rate": 0.01, "loss": 2.0628, "step": 17745 }, { "epoch": 1.8214285714285714, "grad_norm": 0.06452701985836029, "learning_rate": 0.01, "loss": 2.0415, "step": 17748 }, { "epoch": 1.8217364532019704, "grad_norm": 0.056360337883234024, "learning_rate": 0.01, "loss": 2.0159, "step": 17751 }, { "epoch": 1.8220443349753694, "grad_norm": 0.08861987292766571, "learning_rate": 0.01, "loss": 2.0325, "step": 17754 }, { "epoch": 1.8223522167487685, "grad_norm": 0.07276416569948196, "learning_rate": 0.01, "loss": 2.022, "step": 17757 }, { "epoch": 1.8226600985221675, "grad_norm": 0.07501320540904999, "learning_rate": 0.01, "loss": 2.0592, "step": 17760 }, { "epoch": 1.8229679802955665, "grad_norm": 0.08408310264348984, "learning_rate": 0.01, "loss": 2.0353, "step": 17763 }, { "epoch": 1.8232758620689655, "grad_norm": 0.039008378982543945, "learning_rate": 0.01, "loss": 2.0541, "step": 17766 }, { "epoch": 1.8235837438423645, "grad_norm": 0.05153367295861244, "learning_rate": 0.01, "loss": 2.0614, "step": 17769 }, { "epoch": 1.8238916256157636, "grad_norm": 0.05685068294405937, "learning_rate": 0.01, "loss": 2.0603, "step": 17772 }, { "epoch": 1.8241995073891626, "grad_norm": 0.10836745798587799, "learning_rate": 0.01, "loss": 2.0293, "step": 17775 }, { "epoch": 1.8245073891625616, "grad_norm": 0.13855011761188507, "learning_rate": 0.01, "loss": 2.044, "step": 17778 }, { "epoch": 1.8248152709359606, "grad_norm": 0.07912803441286087, "learning_rate": 0.01, "loss": 2.062, "step": 17781 }, { "epoch": 1.8251231527093597, "grad_norm": 0.065729521214962, "learning_rate": 0.01, "loss": 2.0416, "step": 17784 }, { "epoch": 1.8254310344827587, "grad_norm": 0.04546307399868965, "learning_rate": 0.01, "loss": 2.0291, "step": 17787 }, { "epoch": 1.8257389162561575, "grad_norm": 0.03415641188621521, "learning_rate": 0.01, "loss": 2.0391, "step": 17790 }, { "epoch": 1.8260467980295565, "grad_norm": 0.038325123488903046, "learning_rate": 0.01, "loss": 2.0249, "step": 17793 }, { "epoch": 1.8263546798029555, "grad_norm": 0.057417213916778564, "learning_rate": 0.01, "loss": 2.0465, "step": 17796 }, { "epoch": 1.8266625615763545, "grad_norm": 0.07312962412834167, "learning_rate": 0.01, "loss": 2.009, "step": 17799 }, { "epoch": 1.8269704433497536, "grad_norm": 0.06465096771717072, "learning_rate": 0.01, "loss": 2.0582, "step": 17802 }, { "epoch": 1.8272783251231526, "grad_norm": 0.049065001308918, "learning_rate": 0.01, "loss": 2.0466, "step": 17805 }, { "epoch": 1.8275862068965516, "grad_norm": 0.05004505068063736, "learning_rate": 0.01, "loss": 2.0368, "step": 17808 }, { "epoch": 1.8278940886699506, "grad_norm": 0.12177273631095886, "learning_rate": 0.01, "loss": 2.0299, "step": 17811 }, { "epoch": 1.8282019704433496, "grad_norm": 0.09006219357252121, "learning_rate": 0.01, "loss": 2.0442, "step": 17814 }, { "epoch": 1.8285098522167487, "grad_norm": 0.07000398635864258, "learning_rate": 0.01, "loss": 2.0346, "step": 17817 }, { "epoch": 1.8288177339901477, "grad_norm": 0.03561507910490036, "learning_rate": 0.01, "loss": 2.022, "step": 17820 }, { "epoch": 1.8291256157635467, "grad_norm": 0.050965216010808945, "learning_rate": 0.01, "loss": 2.0577, "step": 17823 }, { "epoch": 1.8294334975369457, "grad_norm": 0.04437123239040375, "learning_rate": 0.01, "loss": 2.0204, "step": 17826 }, { "epoch": 1.8297413793103448, "grad_norm": 0.046157170087099075, "learning_rate": 0.01, "loss": 2.0315, "step": 17829 }, { "epoch": 1.8300492610837438, "grad_norm": 0.0641985610127449, "learning_rate": 0.01, "loss": 2.0619, "step": 17832 }, { "epoch": 1.8303571428571428, "grad_norm": 0.10295763611793518, "learning_rate": 0.01, "loss": 2.0142, "step": 17835 }, { "epoch": 1.8306650246305418, "grad_norm": 0.08395816385746002, "learning_rate": 0.01, "loss": 2.0388, "step": 17838 }, { "epoch": 1.8309729064039408, "grad_norm": 0.07087874412536621, "learning_rate": 0.01, "loss": 2.0458, "step": 17841 }, { "epoch": 1.8312807881773399, "grad_norm": 0.04754515364766121, "learning_rate": 0.01, "loss": 2.0305, "step": 17844 }, { "epoch": 1.8315886699507389, "grad_norm": 0.042998362332582474, "learning_rate": 0.01, "loss": 2.0334, "step": 17847 }, { "epoch": 1.831896551724138, "grad_norm": 0.044786881655454636, "learning_rate": 0.01, "loss": 2.0545, "step": 17850 }, { "epoch": 1.832204433497537, "grad_norm": 0.05035366117954254, "learning_rate": 0.01, "loss": 2.0346, "step": 17853 }, { "epoch": 1.832512315270936, "grad_norm": 0.08760454505681992, "learning_rate": 0.01, "loss": 2.0407, "step": 17856 }, { "epoch": 1.832820197044335, "grad_norm": 0.07182349264621735, "learning_rate": 0.01, "loss": 2.0617, "step": 17859 }, { "epoch": 1.833128078817734, "grad_norm": 0.0653420239686966, "learning_rate": 0.01, "loss": 2.02, "step": 17862 }, { "epoch": 1.833435960591133, "grad_norm": 0.07664595544338226, "learning_rate": 0.01, "loss": 2.0453, "step": 17865 }, { "epoch": 1.833743842364532, "grad_norm": 0.052884750068187714, "learning_rate": 0.01, "loss": 2.0433, "step": 17868 }, { "epoch": 1.834051724137931, "grad_norm": 0.049432456493377686, "learning_rate": 0.01, "loss": 2.0392, "step": 17871 }, { "epoch": 1.83435960591133, "grad_norm": 0.10208621621131897, "learning_rate": 0.01, "loss": 2.0425, "step": 17874 }, { "epoch": 1.834667487684729, "grad_norm": 0.0663546770811081, "learning_rate": 0.01, "loss": 2.0276, "step": 17877 }, { "epoch": 1.8349753694581281, "grad_norm": 0.0952199399471283, "learning_rate": 0.01, "loss": 2.0273, "step": 17880 }, { "epoch": 1.8352832512315271, "grad_norm": 0.04969238117337227, "learning_rate": 0.01, "loss": 2.0227, "step": 17883 }, { "epoch": 1.8355911330049262, "grad_norm": 0.05101123824715614, "learning_rate": 0.01, "loss": 2.0642, "step": 17886 }, { "epoch": 1.8358990147783252, "grad_norm": 0.1026005819439888, "learning_rate": 0.01, "loss": 2.0118, "step": 17889 }, { "epoch": 1.8362068965517242, "grad_norm": 0.06481184810400009, "learning_rate": 0.01, "loss": 2.0457, "step": 17892 }, { "epoch": 1.8365147783251232, "grad_norm": 0.0684402734041214, "learning_rate": 0.01, "loss": 2.0364, "step": 17895 }, { "epoch": 1.8368226600985222, "grad_norm": 0.1051085963845253, "learning_rate": 0.01, "loss": 2.0178, "step": 17898 }, { "epoch": 1.8371305418719213, "grad_norm": 0.06582857668399811, "learning_rate": 0.01, "loss": 2.0409, "step": 17901 }, { "epoch": 1.8374384236453203, "grad_norm": 0.05665391683578491, "learning_rate": 0.01, "loss": 2.04, "step": 17904 }, { "epoch": 1.8377463054187193, "grad_norm": 0.06239892914891243, "learning_rate": 0.01, "loss": 2.0199, "step": 17907 }, { "epoch": 1.8380541871921183, "grad_norm": 0.08531507849693298, "learning_rate": 0.01, "loss": 2.0429, "step": 17910 }, { "epoch": 1.8383620689655173, "grad_norm": 0.07379250973463058, "learning_rate": 0.01, "loss": 2.0226, "step": 17913 }, { "epoch": 1.8386699507389164, "grad_norm": 0.052789974957704544, "learning_rate": 0.01, "loss": 2.0198, "step": 17916 }, { "epoch": 1.8389778325123154, "grad_norm": 0.09525316208600998, "learning_rate": 0.01, "loss": 2.0423, "step": 17919 }, { "epoch": 1.8392857142857144, "grad_norm": 0.05700648948550224, "learning_rate": 0.01, "loss": 2.0332, "step": 17922 }, { "epoch": 1.8395935960591134, "grad_norm": 0.061519671231508255, "learning_rate": 0.01, "loss": 2.038, "step": 17925 }, { "epoch": 1.8399014778325125, "grad_norm": 0.05594256520271301, "learning_rate": 0.01, "loss": 2.0247, "step": 17928 }, { "epoch": 1.8402093596059115, "grad_norm": 0.06823567301034927, "learning_rate": 0.01, "loss": 2.0319, "step": 17931 }, { "epoch": 1.8405172413793105, "grad_norm": 0.061398666352033615, "learning_rate": 0.01, "loss": 2.038, "step": 17934 }, { "epoch": 1.8408251231527095, "grad_norm": 0.10590513050556183, "learning_rate": 0.01, "loss": 2.0336, "step": 17937 }, { "epoch": 1.8411330049261085, "grad_norm": 0.0579022578895092, "learning_rate": 0.01, "loss": 2.0249, "step": 17940 }, { "epoch": 1.8414408866995073, "grad_norm": 0.07047640532255173, "learning_rate": 0.01, "loss": 2.0147, "step": 17943 }, { "epoch": 1.8417487684729064, "grad_norm": 0.07486578077077866, "learning_rate": 0.01, "loss": 2.0413, "step": 17946 }, { "epoch": 1.8420566502463054, "grad_norm": 0.057884715497493744, "learning_rate": 0.01, "loss": 2.0357, "step": 17949 }, { "epoch": 1.8423645320197044, "grad_norm": 0.10381656140089035, "learning_rate": 0.01, "loss": 2.0382, "step": 17952 }, { "epoch": 1.8426724137931034, "grad_norm": 0.041863467544317245, "learning_rate": 0.01, "loss": 2.0345, "step": 17955 }, { "epoch": 1.8429802955665024, "grad_norm": 0.10012530535459518, "learning_rate": 0.01, "loss": 2.0648, "step": 17958 }, { "epoch": 1.8432881773399015, "grad_norm": 0.05597177520394325, "learning_rate": 0.01, "loss": 2.0513, "step": 17961 }, { "epoch": 1.8435960591133005, "grad_norm": 0.05338521674275398, "learning_rate": 0.01, "loss": 2.0287, "step": 17964 }, { "epoch": 1.8439039408866995, "grad_norm": 0.049141060560941696, "learning_rate": 0.01, "loss": 2.0486, "step": 17967 }, { "epoch": 1.8442118226600985, "grad_norm": 0.0784049779176712, "learning_rate": 0.01, "loss": 2.0176, "step": 17970 }, { "epoch": 1.8445197044334976, "grad_norm": 0.038596317172050476, "learning_rate": 0.01, "loss": 2.0167, "step": 17973 }, { "epoch": 1.8448275862068966, "grad_norm": 0.08521022647619247, "learning_rate": 0.01, "loss": 2.0364, "step": 17976 }, { "epoch": 1.8451354679802956, "grad_norm": 0.05890432372689247, "learning_rate": 0.01, "loss": 2.062, "step": 17979 }, { "epoch": 1.8454433497536946, "grad_norm": 0.09090931713581085, "learning_rate": 0.01, "loss": 2.0514, "step": 17982 }, { "epoch": 1.8457512315270936, "grad_norm": 0.06019595265388489, "learning_rate": 0.01, "loss": 2.0463, "step": 17985 }, { "epoch": 1.8460591133004927, "grad_norm": 0.07712443917989731, "learning_rate": 0.01, "loss": 2.0466, "step": 17988 }, { "epoch": 1.8463669950738915, "grad_norm": 0.06155428662896156, "learning_rate": 0.01, "loss": 2.0224, "step": 17991 }, { "epoch": 1.8466748768472905, "grad_norm": 0.07221681624650955, "learning_rate": 0.01, "loss": 2.0128, "step": 17994 }, { "epoch": 1.8469827586206895, "grad_norm": 0.056776583194732666, "learning_rate": 0.01, "loss": 2.0156, "step": 17997 }, { "epoch": 1.8472906403940885, "grad_norm": 0.12099254876375198, "learning_rate": 0.01, "loss": 2.0522, "step": 18000 }, { "epoch": 1.8475985221674875, "grad_norm": 0.060344647616147995, "learning_rate": 0.01, "loss": 2.038, "step": 18003 }, { "epoch": 1.8479064039408866, "grad_norm": 0.042333200573921204, "learning_rate": 0.01, "loss": 2.0202, "step": 18006 }, { "epoch": 1.8482142857142856, "grad_norm": 0.046059176325798035, "learning_rate": 0.01, "loss": 2.0505, "step": 18009 }, { "epoch": 1.8485221674876846, "grad_norm": 0.03853166475892067, "learning_rate": 0.01, "loss": 2.0681, "step": 18012 }, { "epoch": 1.8488300492610836, "grad_norm": 0.05197960138320923, "learning_rate": 0.01, "loss": 2.0563, "step": 18015 }, { "epoch": 1.8491379310344827, "grad_norm": 0.03316551446914673, "learning_rate": 0.01, "loss": 2.0279, "step": 18018 }, { "epoch": 1.8494458128078817, "grad_norm": 0.05977516993880272, "learning_rate": 0.01, "loss": 2.0217, "step": 18021 }, { "epoch": 1.8497536945812807, "grad_norm": 0.12331486493349075, "learning_rate": 0.01, "loss": 2.0531, "step": 18024 }, { "epoch": 1.8500615763546797, "grad_norm": 0.08065730333328247, "learning_rate": 0.01, "loss": 2.0508, "step": 18027 }, { "epoch": 1.8503694581280787, "grad_norm": 0.07649014890193939, "learning_rate": 0.01, "loss": 2.0629, "step": 18030 }, { "epoch": 1.8506773399014778, "grad_norm": 0.08431357145309448, "learning_rate": 0.01, "loss": 2.0504, "step": 18033 }, { "epoch": 1.8509852216748768, "grad_norm": 0.041856877505779266, "learning_rate": 0.01, "loss": 2.0376, "step": 18036 }, { "epoch": 1.8512931034482758, "grad_norm": 0.03598650172352791, "learning_rate": 0.01, "loss": 2.0455, "step": 18039 }, { "epoch": 1.8516009852216748, "grad_norm": 0.0360511913895607, "learning_rate": 0.01, "loss": 2.044, "step": 18042 }, { "epoch": 1.8519088669950738, "grad_norm": 0.0409335158765316, "learning_rate": 0.01, "loss": 2.0307, "step": 18045 }, { "epoch": 1.8522167487684729, "grad_norm": 0.04646136611700058, "learning_rate": 0.01, "loss": 2.0306, "step": 18048 }, { "epoch": 1.8525246305418719, "grad_norm": 0.08265028148889542, "learning_rate": 0.01, "loss": 2.0325, "step": 18051 }, { "epoch": 1.852832512315271, "grad_norm": 0.08118387311697006, "learning_rate": 0.01, "loss": 2.0315, "step": 18054 }, { "epoch": 1.85314039408867, "grad_norm": 0.05400428548455238, "learning_rate": 0.01, "loss": 2.0186, "step": 18057 }, { "epoch": 1.853448275862069, "grad_norm": 0.04605553671717644, "learning_rate": 0.01, "loss": 2.0212, "step": 18060 }, { "epoch": 1.853756157635468, "grad_norm": 0.06259449571371078, "learning_rate": 0.01, "loss": 2.0443, "step": 18063 }, { "epoch": 1.854064039408867, "grad_norm": 0.04901091381907463, "learning_rate": 0.01, "loss": 2.023, "step": 18066 }, { "epoch": 1.854371921182266, "grad_norm": 0.13033097982406616, "learning_rate": 0.01, "loss": 2.0696, "step": 18069 }, { "epoch": 1.854679802955665, "grad_norm": 0.04114639014005661, "learning_rate": 0.01, "loss": 2.0314, "step": 18072 }, { "epoch": 1.854987684729064, "grad_norm": 0.05269275978207588, "learning_rate": 0.01, "loss": 2.0576, "step": 18075 }, { "epoch": 1.855295566502463, "grad_norm": 0.052419982850551605, "learning_rate": 0.01, "loss": 2.0309, "step": 18078 }, { "epoch": 1.855603448275862, "grad_norm": 0.11109264940023422, "learning_rate": 0.01, "loss": 2.0234, "step": 18081 }, { "epoch": 1.8559113300492611, "grad_norm": 0.09544682502746582, "learning_rate": 0.01, "loss": 2.0287, "step": 18084 }, { "epoch": 1.8562192118226601, "grad_norm": 0.08282047510147095, "learning_rate": 0.01, "loss": 2.0601, "step": 18087 }, { "epoch": 1.8565270935960592, "grad_norm": 0.04255926236510277, "learning_rate": 0.01, "loss": 2.0094, "step": 18090 }, { "epoch": 1.8568349753694582, "grad_norm": 0.04899003729224205, "learning_rate": 0.01, "loss": 2.0494, "step": 18093 }, { "epoch": 1.8571428571428572, "grad_norm": 0.05081721395254135, "learning_rate": 0.01, "loss": 2.0309, "step": 18096 }, { "epoch": 1.8574507389162562, "grad_norm": 0.06633096188306808, "learning_rate": 0.01, "loss": 2.0526, "step": 18099 }, { "epoch": 1.8577586206896552, "grad_norm": 0.06513489037752151, "learning_rate": 0.01, "loss": 2.0072, "step": 18102 }, { "epoch": 1.8580665024630543, "grad_norm": 0.09429512917995453, "learning_rate": 0.01, "loss": 2.0522, "step": 18105 }, { "epoch": 1.8583743842364533, "grad_norm": 0.06042760610580444, "learning_rate": 0.01, "loss": 2.0613, "step": 18108 }, { "epoch": 1.8586822660098523, "grad_norm": 0.04098260775208473, "learning_rate": 0.01, "loss": 2.0712, "step": 18111 }, { "epoch": 1.8589901477832513, "grad_norm": 0.04882989823818207, "learning_rate": 0.01, "loss": 2.0308, "step": 18114 }, { "epoch": 1.8592980295566504, "grad_norm": 0.06196373701095581, "learning_rate": 0.01, "loss": 2.0535, "step": 18117 }, { "epoch": 1.8596059113300494, "grad_norm": 0.10515942424535751, "learning_rate": 0.01, "loss": 2.0402, "step": 18120 }, { "epoch": 1.8599137931034484, "grad_norm": 0.08962828665971756, "learning_rate": 0.01, "loss": 2.048, "step": 18123 }, { "epoch": 1.8602216748768474, "grad_norm": 0.07672600448131561, "learning_rate": 0.01, "loss": 2.0502, "step": 18126 }, { "epoch": 1.8605295566502464, "grad_norm": 0.04769902676343918, "learning_rate": 0.01, "loss": 2.0444, "step": 18129 }, { "epoch": 1.8608374384236455, "grad_norm": 0.06558270752429962, "learning_rate": 0.01, "loss": 2.0431, "step": 18132 }, { "epoch": 1.8611453201970445, "grad_norm": 0.06909210234880447, "learning_rate": 0.01, "loss": 2.0429, "step": 18135 }, { "epoch": 1.8614532019704435, "grad_norm": 0.07571686059236526, "learning_rate": 0.01, "loss": 2.045, "step": 18138 }, { "epoch": 1.8617610837438425, "grad_norm": 0.11170367896556854, "learning_rate": 0.01, "loss": 2.05, "step": 18141 }, { "epoch": 1.8620689655172413, "grad_norm": 0.12978370487689972, "learning_rate": 0.01, "loss": 2.0621, "step": 18144 }, { "epoch": 1.8623768472906403, "grad_norm": 0.056673552840948105, "learning_rate": 0.01, "loss": 2.0264, "step": 18147 }, { "epoch": 1.8626847290640394, "grad_norm": 0.04110763967037201, "learning_rate": 0.01, "loss": 2.0478, "step": 18150 }, { "epoch": 1.8629926108374384, "grad_norm": 0.06502550840377808, "learning_rate": 0.01, "loss": 2.0404, "step": 18153 }, { "epoch": 1.8633004926108374, "grad_norm": 0.059242501854896545, "learning_rate": 0.01, "loss": 2.0545, "step": 18156 }, { "epoch": 1.8636083743842364, "grad_norm": 0.05173099413514137, "learning_rate": 0.01, "loss": 2.0327, "step": 18159 }, { "epoch": 1.8639162561576355, "grad_norm": 0.0403546541929245, "learning_rate": 0.01, "loss": 2.037, "step": 18162 }, { "epoch": 1.8642241379310345, "grad_norm": 0.10920348763465881, "learning_rate": 0.01, "loss": 2.04, "step": 18165 }, { "epoch": 1.8645320197044335, "grad_norm": 0.05528813973069191, "learning_rate": 0.01, "loss": 2.0207, "step": 18168 }, { "epoch": 1.8648399014778325, "grad_norm": 0.04583175480365753, "learning_rate": 0.01, "loss": 2.0409, "step": 18171 }, { "epoch": 1.8651477832512315, "grad_norm": 0.04503155127167702, "learning_rate": 0.01, "loss": 2.0412, "step": 18174 }, { "epoch": 1.8654556650246306, "grad_norm": 0.038750261068344116, "learning_rate": 0.01, "loss": 2.0412, "step": 18177 }, { "epoch": 1.8657635467980296, "grad_norm": 0.0650535374879837, "learning_rate": 0.01, "loss": 2.0515, "step": 18180 }, { "epoch": 1.8660714285714286, "grad_norm": 0.08589319884777069, "learning_rate": 0.01, "loss": 2.0456, "step": 18183 }, { "epoch": 1.8663793103448276, "grad_norm": 0.06245085969567299, "learning_rate": 0.01, "loss": 2.0257, "step": 18186 }, { "epoch": 1.8666871921182266, "grad_norm": 0.07419238984584808, "learning_rate": 0.01, "loss": 2.0501, "step": 18189 }, { "epoch": 1.8669950738916257, "grad_norm": 0.06336040049791336, "learning_rate": 0.01, "loss": 2.0822, "step": 18192 }, { "epoch": 1.8673029556650245, "grad_norm": 0.09494315087795258, "learning_rate": 0.01, "loss": 2.0523, "step": 18195 }, { "epoch": 1.8676108374384235, "grad_norm": 0.06543273478746414, "learning_rate": 0.01, "loss": 2.0289, "step": 18198 }, { "epoch": 1.8679187192118225, "grad_norm": 0.05602452531456947, "learning_rate": 0.01, "loss": 2.0626, "step": 18201 }, { "epoch": 1.8682266009852215, "grad_norm": 0.041735779494047165, "learning_rate": 0.01, "loss": 2.0137, "step": 18204 }, { "epoch": 1.8685344827586206, "grad_norm": 0.03998032957315445, "learning_rate": 0.01, "loss": 2.0341, "step": 18207 }, { "epoch": 1.8688423645320196, "grad_norm": 0.07010776549577713, "learning_rate": 0.01, "loss": 2.0525, "step": 18210 }, { "epoch": 1.8691502463054186, "grad_norm": 0.08516181260347366, "learning_rate": 0.01, "loss": 2.0065, "step": 18213 }, { "epoch": 1.8694581280788176, "grad_norm": 0.08233955502510071, "learning_rate": 0.01, "loss": 2.0391, "step": 18216 }, { "epoch": 1.8697660098522166, "grad_norm": 0.06725854426622391, "learning_rate": 0.01, "loss": 2.0224, "step": 18219 }, { "epoch": 1.8700738916256157, "grad_norm": 0.03345496207475662, "learning_rate": 0.01, "loss": 2.0327, "step": 18222 }, { "epoch": 1.8703817733990147, "grad_norm": 0.09758662432432175, "learning_rate": 0.01, "loss": 2.0258, "step": 18225 }, { "epoch": 1.8706896551724137, "grad_norm": 0.05376002565026283, "learning_rate": 0.01, "loss": 2.0592, "step": 18228 }, { "epoch": 1.8709975369458127, "grad_norm": 0.04620193690061569, "learning_rate": 0.01, "loss": 2.023, "step": 18231 }, { "epoch": 1.8713054187192117, "grad_norm": 0.0853218212723732, "learning_rate": 0.01, "loss": 2.0352, "step": 18234 }, { "epoch": 1.8716133004926108, "grad_norm": 0.15689584612846375, "learning_rate": 0.01, "loss": 2.0163, "step": 18237 }, { "epoch": 1.8719211822660098, "grad_norm": 0.05037194490432739, "learning_rate": 0.01, "loss": 2.0541, "step": 18240 }, { "epoch": 1.8722290640394088, "grad_norm": 0.031591251492500305, "learning_rate": 0.01, "loss": 2.0502, "step": 18243 }, { "epoch": 1.8725369458128078, "grad_norm": 0.05832947790622711, "learning_rate": 0.01, "loss": 2.0284, "step": 18246 }, { "epoch": 1.8728448275862069, "grad_norm": 0.059619709849357605, "learning_rate": 0.01, "loss": 2.053, "step": 18249 }, { "epoch": 1.8731527093596059, "grad_norm": 0.04232211783528328, "learning_rate": 0.01, "loss": 2.0456, "step": 18252 }, { "epoch": 1.873460591133005, "grad_norm": 0.03756287693977356, "learning_rate": 0.01, "loss": 2.0183, "step": 18255 }, { "epoch": 1.873768472906404, "grad_norm": 0.04177022725343704, "learning_rate": 0.01, "loss": 2.0213, "step": 18258 }, { "epoch": 1.874076354679803, "grad_norm": 0.054342493414878845, "learning_rate": 0.01, "loss": 2.0246, "step": 18261 }, { "epoch": 1.874384236453202, "grad_norm": 0.11872408539056778, "learning_rate": 0.01, "loss": 2.0259, "step": 18264 }, { "epoch": 1.874692118226601, "grad_norm": 0.05143645405769348, "learning_rate": 0.01, "loss": 2.0473, "step": 18267 }, { "epoch": 1.875, "grad_norm": 0.06726546585559845, "learning_rate": 0.01, "loss": 2.0413, "step": 18270 }, { "epoch": 1.875307881773399, "grad_norm": 0.10031245648860931, "learning_rate": 0.01, "loss": 2.0315, "step": 18273 }, { "epoch": 1.875615763546798, "grad_norm": 0.09145006537437439, "learning_rate": 0.01, "loss": 2.0359, "step": 18276 }, { "epoch": 1.875923645320197, "grad_norm": 0.0797610953450203, "learning_rate": 0.01, "loss": 2.0507, "step": 18279 }, { "epoch": 1.876231527093596, "grad_norm": 0.07170062512159348, "learning_rate": 0.01, "loss": 2.0369, "step": 18282 }, { "epoch": 1.876539408866995, "grad_norm": 0.043757934123277664, "learning_rate": 0.01, "loss": 2.0825, "step": 18285 }, { "epoch": 1.8768472906403941, "grad_norm": 0.059610821306705475, "learning_rate": 0.01, "loss": 2.0275, "step": 18288 }, { "epoch": 1.8771551724137931, "grad_norm": 0.06014898791909218, "learning_rate": 0.01, "loss": 2.0498, "step": 18291 }, { "epoch": 1.8774630541871922, "grad_norm": 0.0823112353682518, "learning_rate": 0.01, "loss": 1.9977, "step": 18294 }, { "epoch": 1.8777709359605912, "grad_norm": 0.11342828720808029, "learning_rate": 0.01, "loss": 2.0259, "step": 18297 }, { "epoch": 1.8780788177339902, "grad_norm": 0.1533091962337494, "learning_rate": 0.01, "loss": 2.0593, "step": 18300 }, { "epoch": 1.8783866995073892, "grad_norm": 0.09665971249341965, "learning_rate": 0.01, "loss": 2.0304, "step": 18303 }, { "epoch": 1.8786945812807883, "grad_norm": 0.043992988765239716, "learning_rate": 0.01, "loss": 2.0412, "step": 18306 }, { "epoch": 1.8790024630541873, "grad_norm": 0.03452041372656822, "learning_rate": 0.01, "loss": 2.009, "step": 18309 }, { "epoch": 1.8793103448275863, "grad_norm": 0.05596618726849556, "learning_rate": 0.01, "loss": 2.0498, "step": 18312 }, { "epoch": 1.8796182266009853, "grad_norm": 0.0542016327381134, "learning_rate": 0.01, "loss": 2.033, "step": 18315 }, { "epoch": 1.8799261083743843, "grad_norm": 0.049744654446840286, "learning_rate": 0.01, "loss": 2.0625, "step": 18318 }, { "epoch": 1.8802339901477834, "grad_norm": 0.04219472035765648, "learning_rate": 0.01, "loss": 2.0458, "step": 18321 }, { "epoch": 1.8805418719211824, "grad_norm": 0.11828272044658661, "learning_rate": 0.01, "loss": 2.047, "step": 18324 }, { "epoch": 1.8808497536945814, "grad_norm": 0.042180564254522324, "learning_rate": 0.01, "loss": 2.0477, "step": 18327 }, { "epoch": 1.8811576354679804, "grad_norm": 0.05486786365509033, "learning_rate": 0.01, "loss": 2.034, "step": 18330 }, { "epoch": 1.8814655172413794, "grad_norm": 0.09456659108400345, "learning_rate": 0.01, "loss": 2.059, "step": 18333 }, { "epoch": 1.8817733990147785, "grad_norm": 0.03962776064872742, "learning_rate": 0.01, "loss": 2.0569, "step": 18336 }, { "epoch": 1.8820812807881775, "grad_norm": 0.06588723510503769, "learning_rate": 0.01, "loss": 2.0131, "step": 18339 }, { "epoch": 1.8823891625615765, "grad_norm": 0.0490611270070076, "learning_rate": 0.01, "loss": 2.0558, "step": 18342 }, { "epoch": 1.8826970443349755, "grad_norm": 0.10546906292438507, "learning_rate": 0.01, "loss": 2.0354, "step": 18345 }, { "epoch": 1.8830049261083743, "grad_norm": 0.05751054733991623, "learning_rate": 0.01, "loss": 2.0321, "step": 18348 }, { "epoch": 1.8833128078817734, "grad_norm": 0.102676160633564, "learning_rate": 0.01, "loss": 2.0116, "step": 18351 }, { "epoch": 1.8836206896551724, "grad_norm": 0.0536825954914093, "learning_rate": 0.01, "loss": 2.0677, "step": 18354 }, { "epoch": 1.8839285714285714, "grad_norm": 0.039357978850603104, "learning_rate": 0.01, "loss": 2.0217, "step": 18357 }, { "epoch": 1.8842364532019704, "grad_norm": 0.03515158221125603, "learning_rate": 0.01, "loss": 2.0332, "step": 18360 }, { "epoch": 1.8845443349753694, "grad_norm": 0.04234091565012932, "learning_rate": 0.01, "loss": 2.0329, "step": 18363 }, { "epoch": 1.8848522167487685, "grad_norm": 0.06893119215965271, "learning_rate": 0.01, "loss": 2.0241, "step": 18366 }, { "epoch": 1.8851600985221675, "grad_norm": 0.08353175222873688, "learning_rate": 0.01, "loss": 2.0236, "step": 18369 }, { "epoch": 1.8854679802955665, "grad_norm": 0.0567467026412487, "learning_rate": 0.01, "loss": 2.0672, "step": 18372 }, { "epoch": 1.8857758620689655, "grad_norm": 0.05529101565480232, "learning_rate": 0.01, "loss": 2.0364, "step": 18375 }, { "epoch": 1.8860837438423645, "grad_norm": 0.09581262618303299, "learning_rate": 0.01, "loss": 2.0395, "step": 18378 }, { "epoch": 1.8863916256157636, "grad_norm": 0.052034202963113785, "learning_rate": 0.01, "loss": 2.0415, "step": 18381 }, { "epoch": 1.8866995073891626, "grad_norm": 0.10314558446407318, "learning_rate": 0.01, "loss": 2.0095, "step": 18384 }, { "epoch": 1.8870073891625616, "grad_norm": 0.04631441831588745, "learning_rate": 0.01, "loss": 2.0504, "step": 18387 }, { "epoch": 1.8873152709359606, "grad_norm": 0.08067111670970917, "learning_rate": 0.01, "loss": 2.0504, "step": 18390 }, { "epoch": 1.8876231527093597, "grad_norm": 0.07892802357673645, "learning_rate": 0.01, "loss": 2.0495, "step": 18393 }, { "epoch": 1.8879310344827587, "grad_norm": 0.06167163327336311, "learning_rate": 0.01, "loss": 2.0546, "step": 18396 }, { "epoch": 1.8882389162561575, "grad_norm": 0.06746269762516022, "learning_rate": 0.01, "loss": 2.0187, "step": 18399 }, { "epoch": 1.8885467980295565, "grad_norm": 0.06389199942350388, "learning_rate": 0.01, "loss": 2.0556, "step": 18402 }, { "epoch": 1.8888546798029555, "grad_norm": 0.05448369309306145, "learning_rate": 0.01, "loss": 2.0607, "step": 18405 }, { "epoch": 1.8891625615763545, "grad_norm": 0.05535599961876869, "learning_rate": 0.01, "loss": 2.0347, "step": 18408 }, { "epoch": 1.8894704433497536, "grad_norm": 0.0533415786921978, "learning_rate": 0.01, "loss": 2.0648, "step": 18411 }, { "epoch": 1.8897783251231526, "grad_norm": 0.06423043459653854, "learning_rate": 0.01, "loss": 2.0554, "step": 18414 }, { "epoch": 1.8900862068965516, "grad_norm": 0.04682399705052376, "learning_rate": 0.01, "loss": 2.0402, "step": 18417 }, { "epoch": 1.8903940886699506, "grad_norm": 0.09395507723093033, "learning_rate": 0.01, "loss": 2.0417, "step": 18420 }, { "epoch": 1.8907019704433496, "grad_norm": 0.07777848839759827, "learning_rate": 0.01, "loss": 2.068, "step": 18423 }, { "epoch": 1.8910098522167487, "grad_norm": 0.052006904035806656, "learning_rate": 0.01, "loss": 2.0232, "step": 18426 }, { "epoch": 1.8913177339901477, "grad_norm": 0.05325109511613846, "learning_rate": 0.01, "loss": 2.0292, "step": 18429 }, { "epoch": 1.8916256157635467, "grad_norm": 0.05496850982308388, "learning_rate": 0.01, "loss": 2.0531, "step": 18432 }, { "epoch": 1.8919334975369457, "grad_norm": 0.0395922027528286, "learning_rate": 0.01, "loss": 2.0504, "step": 18435 }, { "epoch": 1.8922413793103448, "grad_norm": 0.0664554312825203, "learning_rate": 0.01, "loss": 2.0555, "step": 18438 }, { "epoch": 1.8925492610837438, "grad_norm": 0.06475098431110382, "learning_rate": 0.01, "loss": 2.0361, "step": 18441 }, { "epoch": 1.8928571428571428, "grad_norm": 0.06291298568248749, "learning_rate": 0.01, "loss": 2.0083, "step": 18444 }, { "epoch": 1.8931650246305418, "grad_norm": 0.07891640067100525, "learning_rate": 0.01, "loss": 2.0338, "step": 18447 }, { "epoch": 1.8934729064039408, "grad_norm": 0.10065143555402756, "learning_rate": 0.01, "loss": 2.0372, "step": 18450 }, { "epoch": 1.8937807881773399, "grad_norm": 0.0962565690279007, "learning_rate": 0.01, "loss": 2.0369, "step": 18453 }, { "epoch": 1.8940886699507389, "grad_norm": 0.06286763399839401, "learning_rate": 0.01, "loss": 2.0416, "step": 18456 }, { "epoch": 1.894396551724138, "grad_norm": 0.05067887529730797, "learning_rate": 0.01, "loss": 2.0582, "step": 18459 }, { "epoch": 1.894704433497537, "grad_norm": 0.03956342115998268, "learning_rate": 0.01, "loss": 2.0633, "step": 18462 }, { "epoch": 1.895012315270936, "grad_norm": 0.06670361012220383, "learning_rate": 0.01, "loss": 2.0569, "step": 18465 }, { "epoch": 1.895320197044335, "grad_norm": 0.07848145067691803, "learning_rate": 0.01, "loss": 2.037, "step": 18468 }, { "epoch": 1.895628078817734, "grad_norm": 0.05415938422083855, "learning_rate": 0.01, "loss": 2.0699, "step": 18471 }, { "epoch": 1.895935960591133, "grad_norm": 0.07549092918634415, "learning_rate": 0.01, "loss": 2.0369, "step": 18474 }, { "epoch": 1.896243842364532, "grad_norm": 0.057871196419000626, "learning_rate": 0.01, "loss": 2.0434, "step": 18477 }, { "epoch": 1.896551724137931, "grad_norm": 0.059748612344264984, "learning_rate": 0.01, "loss": 2.0374, "step": 18480 }, { "epoch": 1.89685960591133, "grad_norm": 0.04501016065478325, "learning_rate": 0.01, "loss": 2.034, "step": 18483 }, { "epoch": 1.897167487684729, "grad_norm": 0.06361118704080582, "learning_rate": 0.01, "loss": 2.0398, "step": 18486 }, { "epoch": 1.8974753694581281, "grad_norm": 0.09649393707513809, "learning_rate": 0.01, "loss": 2.0311, "step": 18489 }, { "epoch": 1.8977832512315271, "grad_norm": 0.08288730680942535, "learning_rate": 0.01, "loss": 2.0585, "step": 18492 }, { "epoch": 1.8980911330049262, "grad_norm": 0.037788692861795425, "learning_rate": 0.01, "loss": 2.028, "step": 18495 }, { "epoch": 1.8983990147783252, "grad_norm": 0.05678097531199455, "learning_rate": 0.01, "loss": 2.029, "step": 18498 }, { "epoch": 1.8987068965517242, "grad_norm": 0.05753886699676514, "learning_rate": 0.01, "loss": 2.0523, "step": 18501 }, { "epoch": 1.8990147783251232, "grad_norm": 0.0542941652238369, "learning_rate": 0.01, "loss": 2.0334, "step": 18504 }, { "epoch": 1.8993226600985222, "grad_norm": 0.06856728345155716, "learning_rate": 0.01, "loss": 2.0179, "step": 18507 }, { "epoch": 1.8996305418719213, "grad_norm": 0.09270088374614716, "learning_rate": 0.01, "loss": 2.0258, "step": 18510 }, { "epoch": 1.8999384236453203, "grad_norm": 0.04473109543323517, "learning_rate": 0.01, "loss": 2.0036, "step": 18513 }, { "epoch": 1.9002463054187193, "grad_norm": 0.06040007993578911, "learning_rate": 0.01, "loss": 2.0344, "step": 18516 }, { "epoch": 1.9005541871921183, "grad_norm": 0.032143257558345795, "learning_rate": 0.01, "loss": 2.0478, "step": 18519 }, { "epoch": 1.9008620689655173, "grad_norm": 0.04205821454524994, "learning_rate": 0.01, "loss": 2.0562, "step": 18522 }, { "epoch": 1.9011699507389164, "grad_norm": 0.03920583799481392, "learning_rate": 0.01, "loss": 2.0561, "step": 18525 }, { "epoch": 1.9014778325123154, "grad_norm": 0.17323125898838043, "learning_rate": 0.01, "loss": 2.0243, "step": 18528 }, { "epoch": 1.9017857142857144, "grad_norm": 0.04595707729458809, "learning_rate": 0.01, "loss": 2.0515, "step": 18531 }, { "epoch": 1.9020935960591134, "grad_norm": 0.03803316131234169, "learning_rate": 0.01, "loss": 2.0268, "step": 18534 }, { "epoch": 1.9024014778325125, "grad_norm": 0.04623658210039139, "learning_rate": 0.01, "loss": 2.0423, "step": 18537 }, { "epoch": 1.9027093596059115, "grad_norm": 0.04388248175382614, "learning_rate": 0.01, "loss": 2.0207, "step": 18540 }, { "epoch": 1.9030172413793105, "grad_norm": 0.03582540154457092, "learning_rate": 0.01, "loss": 2.0307, "step": 18543 }, { "epoch": 1.9033251231527095, "grad_norm": 0.033453166484832764, "learning_rate": 0.01, "loss": 2.0514, "step": 18546 }, { "epoch": 1.9036330049261085, "grad_norm": 0.04929531365633011, "learning_rate": 0.01, "loss": 2.022, "step": 18549 }, { "epoch": 1.9039408866995073, "grad_norm": 0.0575069934129715, "learning_rate": 0.01, "loss": 2.0136, "step": 18552 }, { "epoch": 1.9042487684729064, "grad_norm": 0.044883664697408676, "learning_rate": 0.01, "loss": 2.0267, "step": 18555 }, { "epoch": 1.9045566502463054, "grad_norm": 0.06335309147834778, "learning_rate": 0.01, "loss": 2.013, "step": 18558 }, { "epoch": 1.9048645320197044, "grad_norm": 0.07315582036972046, "learning_rate": 0.01, "loss": 2.0331, "step": 18561 }, { "epoch": 1.9051724137931034, "grad_norm": 0.08378446102142334, "learning_rate": 0.01, "loss": 2.0154, "step": 18564 }, { "epoch": 1.9054802955665024, "grad_norm": 0.09492503106594086, "learning_rate": 0.01, "loss": 2.0239, "step": 18567 }, { "epoch": 1.9057881773399015, "grad_norm": 0.0497819185256958, "learning_rate": 0.01, "loss": 2.0551, "step": 18570 }, { "epoch": 1.9060960591133005, "grad_norm": 0.06625241041183472, "learning_rate": 0.01, "loss": 2.0477, "step": 18573 }, { "epoch": 1.9064039408866995, "grad_norm": 0.13533645868301392, "learning_rate": 0.01, "loss": 2.0288, "step": 18576 }, { "epoch": 1.9067118226600985, "grad_norm": 0.129546657204628, "learning_rate": 0.01, "loss": 1.9943, "step": 18579 }, { "epoch": 1.9070197044334976, "grad_norm": 0.0862266942858696, "learning_rate": 0.01, "loss": 2.0273, "step": 18582 }, { "epoch": 1.9073275862068966, "grad_norm": 0.04262632504105568, "learning_rate": 0.01, "loss": 2.0289, "step": 18585 }, { "epoch": 1.9076354679802956, "grad_norm": 0.06536297500133514, "learning_rate": 0.01, "loss": 2.0453, "step": 18588 }, { "epoch": 1.9079433497536946, "grad_norm": 0.04408801719546318, "learning_rate": 0.01, "loss": 2.045, "step": 18591 }, { "epoch": 1.9082512315270936, "grad_norm": 0.0382089763879776, "learning_rate": 0.01, "loss": 2.057, "step": 18594 }, { "epoch": 1.9085591133004927, "grad_norm": 0.05695042014122009, "learning_rate": 0.01, "loss": 2.0468, "step": 18597 }, { "epoch": 1.9088669950738915, "grad_norm": 0.06890982389450073, "learning_rate": 0.01, "loss": 2.0631, "step": 18600 }, { "epoch": 1.9091748768472905, "grad_norm": 0.06517864018678665, "learning_rate": 0.01, "loss": 2.0288, "step": 18603 }, { "epoch": 1.9094827586206895, "grad_norm": 0.03709007799625397, "learning_rate": 0.01, "loss": 2.0579, "step": 18606 }, { "epoch": 1.9097906403940885, "grad_norm": 0.040355831384658813, "learning_rate": 0.01, "loss": 2.023, "step": 18609 }, { "epoch": 1.9100985221674875, "grad_norm": 0.08973202854394913, "learning_rate": 0.01, "loss": 2.0495, "step": 18612 }, { "epoch": 1.9104064039408866, "grad_norm": 0.08074682205915451, "learning_rate": 0.01, "loss": 2.0352, "step": 18615 }, { "epoch": 1.9107142857142856, "grad_norm": 0.07134959101676941, "learning_rate": 0.01, "loss": 2.0234, "step": 18618 }, { "epoch": 1.9110221674876846, "grad_norm": 0.10389960557222366, "learning_rate": 0.01, "loss": 2.0456, "step": 18621 }, { "epoch": 1.9113300492610836, "grad_norm": 0.052665699273347855, "learning_rate": 0.01, "loss": 2.019, "step": 18624 }, { "epoch": 1.9116379310344827, "grad_norm": 0.06355523318052292, "learning_rate": 0.01, "loss": 2.0328, "step": 18627 }, { "epoch": 1.9119458128078817, "grad_norm": 0.06806465983390808, "learning_rate": 0.01, "loss": 2.0364, "step": 18630 }, { "epoch": 1.9122536945812807, "grad_norm": 0.08892465382814407, "learning_rate": 0.01, "loss": 2.0436, "step": 18633 }, { "epoch": 1.9125615763546797, "grad_norm": 0.09806855767965317, "learning_rate": 0.01, "loss": 2.0494, "step": 18636 }, { "epoch": 1.9128694581280787, "grad_norm": 0.036283593624830246, "learning_rate": 0.01, "loss": 2.03, "step": 18639 }, { "epoch": 1.9131773399014778, "grad_norm": 0.06654248386621475, "learning_rate": 0.01, "loss": 2.0177, "step": 18642 }, { "epoch": 1.9134852216748768, "grad_norm": 0.07729227095842361, "learning_rate": 0.01, "loss": 2.041, "step": 18645 }, { "epoch": 1.9137931034482758, "grad_norm": 0.05296695604920387, "learning_rate": 0.01, "loss": 2.0512, "step": 18648 }, { "epoch": 1.9141009852216748, "grad_norm": 0.05579183250665665, "learning_rate": 0.01, "loss": 2.0171, "step": 18651 }, { "epoch": 1.9144088669950738, "grad_norm": 0.04230615124106407, "learning_rate": 0.01, "loss": 2.0178, "step": 18654 }, { "epoch": 1.9147167487684729, "grad_norm": 0.0412709042429924, "learning_rate": 0.01, "loss": 2.0356, "step": 18657 }, { "epoch": 1.9150246305418719, "grad_norm": 0.056640543043613434, "learning_rate": 0.01, "loss": 2.0506, "step": 18660 }, { "epoch": 1.915332512315271, "grad_norm": 0.04353609308600426, "learning_rate": 0.01, "loss": 2.0703, "step": 18663 }, { "epoch": 1.91564039408867, "grad_norm": 0.04212663322687149, "learning_rate": 0.01, "loss": 2.0333, "step": 18666 }, { "epoch": 1.915948275862069, "grad_norm": 0.07639022916555405, "learning_rate": 0.01, "loss": 2.0267, "step": 18669 }, { "epoch": 1.916256157635468, "grad_norm": 0.09405479580163956, "learning_rate": 0.01, "loss": 2.0532, "step": 18672 }, { "epoch": 1.916564039408867, "grad_norm": 0.07501058280467987, "learning_rate": 0.01, "loss": 2.0281, "step": 18675 }, { "epoch": 1.916871921182266, "grad_norm": 0.06343735009431839, "learning_rate": 0.01, "loss": 2.0387, "step": 18678 }, { "epoch": 1.917179802955665, "grad_norm": 0.07794613391160965, "learning_rate": 0.01, "loss": 2.0189, "step": 18681 }, { "epoch": 1.917487684729064, "grad_norm": 0.1063399538397789, "learning_rate": 0.01, "loss": 2.0522, "step": 18684 }, { "epoch": 1.917795566502463, "grad_norm": 0.06528618931770325, "learning_rate": 0.01, "loss": 2.0411, "step": 18687 }, { "epoch": 1.918103448275862, "grad_norm": 0.063084177672863, "learning_rate": 0.01, "loss": 2.0405, "step": 18690 }, { "epoch": 1.9184113300492611, "grad_norm": 0.06663991510868073, "learning_rate": 0.01, "loss": 2.0547, "step": 18693 }, { "epoch": 1.9187192118226601, "grad_norm": 0.09827464818954468, "learning_rate": 0.01, "loss": 2.0285, "step": 18696 }, { "epoch": 1.9190270935960592, "grad_norm": 0.052305273711681366, "learning_rate": 0.01, "loss": 2.0205, "step": 18699 }, { "epoch": 1.9193349753694582, "grad_norm": 0.07126889377832413, "learning_rate": 0.01, "loss": 2.0331, "step": 18702 }, { "epoch": 1.9196428571428572, "grad_norm": 0.06262009590864182, "learning_rate": 0.01, "loss": 2.0217, "step": 18705 }, { "epoch": 1.9199507389162562, "grad_norm": 0.056034356355667114, "learning_rate": 0.01, "loss": 2.0136, "step": 18708 }, { "epoch": 1.9202586206896552, "grad_norm": 0.07673577219247818, "learning_rate": 0.01, "loss": 2.0617, "step": 18711 }, { "epoch": 1.9205665024630543, "grad_norm": 0.06006854772567749, "learning_rate": 0.01, "loss": 2.0493, "step": 18714 }, { "epoch": 1.9208743842364533, "grad_norm": 0.07149071991443634, "learning_rate": 0.01, "loss": 2.0339, "step": 18717 }, { "epoch": 1.9211822660098523, "grad_norm": 0.04935576766729355, "learning_rate": 0.01, "loss": 2.0228, "step": 18720 }, { "epoch": 1.9214901477832513, "grad_norm": 0.1052050068974495, "learning_rate": 0.01, "loss": 2.0384, "step": 18723 }, { "epoch": 1.9217980295566504, "grad_norm": 0.07589028030633926, "learning_rate": 0.01, "loss": 2.0349, "step": 18726 }, { "epoch": 1.9221059113300494, "grad_norm": 0.0862005278468132, "learning_rate": 0.01, "loss": 2.0357, "step": 18729 }, { "epoch": 1.9224137931034484, "grad_norm": 0.07210662961006165, "learning_rate": 0.01, "loss": 2.0344, "step": 18732 }, { "epoch": 1.9227216748768474, "grad_norm": 0.0924825370311737, "learning_rate": 0.01, "loss": 2.0069, "step": 18735 }, { "epoch": 1.9230295566502464, "grad_norm": 0.05819706991314888, "learning_rate": 0.01, "loss": 2.0266, "step": 18738 }, { "epoch": 1.9233374384236455, "grad_norm": 0.04784362018108368, "learning_rate": 0.01, "loss": 2.0633, "step": 18741 }, { "epoch": 1.9236453201970445, "grad_norm": 0.07216835021972656, "learning_rate": 0.01, "loss": 2.0107, "step": 18744 }, { "epoch": 1.9239532019704435, "grad_norm": 0.05539752170443535, "learning_rate": 0.01, "loss": 2.0222, "step": 18747 }, { "epoch": 1.9242610837438425, "grad_norm": 0.07037390768527985, "learning_rate": 0.01, "loss": 2.0331, "step": 18750 }, { "epoch": 1.9245689655172413, "grad_norm": 0.0941152572631836, "learning_rate": 0.01, "loss": 2.0432, "step": 18753 }, { "epoch": 1.9248768472906403, "grad_norm": 0.05315488949418068, "learning_rate": 0.01, "loss": 2.036, "step": 18756 }, { "epoch": 1.9251847290640394, "grad_norm": 0.04986554756760597, "learning_rate": 0.01, "loss": 2.0249, "step": 18759 }, { "epoch": 1.9254926108374384, "grad_norm": 0.0750490128993988, "learning_rate": 0.01, "loss": 2.0448, "step": 18762 }, { "epoch": 1.9258004926108374, "grad_norm": 0.13903938233852386, "learning_rate": 0.01, "loss": 2.0433, "step": 18765 }, { "epoch": 1.9261083743842364, "grad_norm": 0.08733932673931122, "learning_rate": 0.01, "loss": 2.0223, "step": 18768 }, { "epoch": 1.9264162561576355, "grad_norm": 0.04527903348207474, "learning_rate": 0.01, "loss": 2.0112, "step": 18771 }, { "epoch": 1.9267241379310345, "grad_norm": 0.036207813769578934, "learning_rate": 0.01, "loss": 2.0353, "step": 18774 }, { "epoch": 1.9270320197044335, "grad_norm": 0.04572034999728203, "learning_rate": 0.01, "loss": 2.0388, "step": 18777 }, { "epoch": 1.9273399014778325, "grad_norm": 0.03662864491343498, "learning_rate": 0.01, "loss": 2.0023, "step": 18780 }, { "epoch": 1.9276477832512315, "grad_norm": 0.12049257755279541, "learning_rate": 0.01, "loss": 2.027, "step": 18783 }, { "epoch": 1.9279556650246306, "grad_norm": 0.1005631759762764, "learning_rate": 0.01, "loss": 2.0537, "step": 18786 }, { "epoch": 1.9282635467980296, "grad_norm": 0.13943985104560852, "learning_rate": 0.01, "loss": 2.0129, "step": 18789 }, { "epoch": 1.9285714285714286, "grad_norm": 0.13312341272830963, "learning_rate": 0.01, "loss": 2.0196, "step": 18792 }, { "epoch": 1.9288793103448276, "grad_norm": 0.0517788864672184, "learning_rate": 0.01, "loss": 2.0291, "step": 18795 }, { "epoch": 1.9291871921182266, "grad_norm": 0.05525217577815056, "learning_rate": 0.01, "loss": 2.0707, "step": 18798 }, { "epoch": 1.9294950738916257, "grad_norm": 0.04876135662198067, "learning_rate": 0.01, "loss": 2.0441, "step": 18801 }, { "epoch": 1.9298029556650245, "grad_norm": 0.04944787919521332, "learning_rate": 0.01, "loss": 2.0479, "step": 18804 }, { "epoch": 1.9301108374384235, "grad_norm": 0.06437812745571136, "learning_rate": 0.01, "loss": 2.0316, "step": 18807 }, { "epoch": 1.9304187192118225, "grad_norm": 0.04027709737420082, "learning_rate": 0.01, "loss": 2.0443, "step": 18810 }, { "epoch": 1.9307266009852215, "grad_norm": 0.05178974196314812, "learning_rate": 0.01, "loss": 2.0127, "step": 18813 }, { "epoch": 1.9310344827586206, "grad_norm": 0.05347009375691414, "learning_rate": 0.01, "loss": 2.0426, "step": 18816 }, { "epoch": 1.9313423645320196, "grad_norm": 0.03055960312485695, "learning_rate": 0.01, "loss": 2.0369, "step": 18819 }, { "epoch": 1.9316502463054186, "grad_norm": 0.10861945152282715, "learning_rate": 0.01, "loss": 2.0398, "step": 18822 }, { "epoch": 1.9319581280788176, "grad_norm": 0.05932777374982834, "learning_rate": 0.01, "loss": 2.0272, "step": 18825 }, { "epoch": 1.9322660098522166, "grad_norm": 0.046545740216970444, "learning_rate": 0.01, "loss": 2.0171, "step": 18828 }, { "epoch": 1.9325738916256157, "grad_norm": 0.07582221925258636, "learning_rate": 0.01, "loss": 2.0239, "step": 18831 }, { "epoch": 1.9328817733990147, "grad_norm": 0.07975540310144424, "learning_rate": 0.01, "loss": 2.0073, "step": 18834 }, { "epoch": 1.9331896551724137, "grad_norm": 0.07365059852600098, "learning_rate": 0.01, "loss": 2.0188, "step": 18837 }, { "epoch": 1.9334975369458127, "grad_norm": 0.09160298854112625, "learning_rate": 0.01, "loss": 2.027, "step": 18840 }, { "epoch": 1.9338054187192117, "grad_norm": 0.07767198234796524, "learning_rate": 0.01, "loss": 2.0157, "step": 18843 }, { "epoch": 1.9341133004926108, "grad_norm": 0.07545919716358185, "learning_rate": 0.01, "loss": 2.0334, "step": 18846 }, { "epoch": 1.9344211822660098, "grad_norm": 0.06564575433731079, "learning_rate": 0.01, "loss": 2.0199, "step": 18849 }, { "epoch": 1.9347290640394088, "grad_norm": 0.04205799475312233, "learning_rate": 0.01, "loss": 2.0275, "step": 18852 }, { "epoch": 1.9350369458128078, "grad_norm": 0.07428024709224701, "learning_rate": 0.01, "loss": 2.0258, "step": 18855 }, { "epoch": 1.9353448275862069, "grad_norm": 0.08150817453861237, "learning_rate": 0.01, "loss": 2.0404, "step": 18858 }, { "epoch": 1.9356527093596059, "grad_norm": 0.07489453256130219, "learning_rate": 0.01, "loss": 2.0489, "step": 18861 }, { "epoch": 1.935960591133005, "grad_norm": 0.09983116388320923, "learning_rate": 0.01, "loss": 2.0101, "step": 18864 }, { "epoch": 1.936268472906404, "grad_norm": 0.09074544906616211, "learning_rate": 0.01, "loss": 2.0385, "step": 18867 }, { "epoch": 1.936576354679803, "grad_norm": 0.056056223809719086, "learning_rate": 0.01, "loss": 2.0531, "step": 18870 }, { "epoch": 1.936884236453202, "grad_norm": 0.04818575084209442, "learning_rate": 0.01, "loss": 2.029, "step": 18873 }, { "epoch": 1.937192118226601, "grad_norm": 0.04811173304915428, "learning_rate": 0.01, "loss": 2.0147, "step": 18876 }, { "epoch": 1.9375, "grad_norm": 0.05799747258424759, "learning_rate": 0.01, "loss": 2.0466, "step": 18879 }, { "epoch": 1.937807881773399, "grad_norm": 0.07357611507177353, "learning_rate": 0.01, "loss": 2.0404, "step": 18882 }, { "epoch": 1.938115763546798, "grad_norm": 0.10275068879127502, "learning_rate": 0.01, "loss": 2.0468, "step": 18885 }, { "epoch": 1.938423645320197, "grad_norm": 0.03685866296291351, "learning_rate": 0.01, "loss": 2.0165, "step": 18888 }, { "epoch": 1.938731527093596, "grad_norm": 0.03603344038128853, "learning_rate": 0.01, "loss": 2.0403, "step": 18891 }, { "epoch": 1.939039408866995, "grad_norm": 0.06932532042264938, "learning_rate": 0.01, "loss": 2.0359, "step": 18894 }, { "epoch": 1.9393472906403941, "grad_norm": 0.05983889847993851, "learning_rate": 0.01, "loss": 2.0299, "step": 18897 }, { "epoch": 1.9396551724137931, "grad_norm": 0.1199260875582695, "learning_rate": 0.01, "loss": 2.0467, "step": 18900 }, { "epoch": 1.9399630541871922, "grad_norm": 0.06222264841198921, "learning_rate": 0.01, "loss": 2.0387, "step": 18903 }, { "epoch": 1.9402709359605912, "grad_norm": 0.0764993354678154, "learning_rate": 0.01, "loss": 2.0014, "step": 18906 }, { "epoch": 1.9405788177339902, "grad_norm": 0.04790098965167999, "learning_rate": 0.01, "loss": 2.0399, "step": 18909 }, { "epoch": 1.9408866995073892, "grad_norm": 0.03822425380349159, "learning_rate": 0.01, "loss": 2.0325, "step": 18912 }, { "epoch": 1.9411945812807883, "grad_norm": 0.05336176976561546, "learning_rate": 0.01, "loss": 2.0307, "step": 18915 }, { "epoch": 1.9415024630541873, "grad_norm": 0.08732246607542038, "learning_rate": 0.01, "loss": 2.0373, "step": 18918 }, { "epoch": 1.9418103448275863, "grad_norm": 0.08886411786079407, "learning_rate": 0.01, "loss": 2.0682, "step": 18921 }, { "epoch": 1.9421182266009853, "grad_norm": 0.08069706708192825, "learning_rate": 0.01, "loss": 2.0382, "step": 18924 }, { "epoch": 1.9424261083743843, "grad_norm": 0.08464798331260681, "learning_rate": 0.01, "loss": 2.0207, "step": 18927 }, { "epoch": 1.9427339901477834, "grad_norm": 0.07051963359117508, "learning_rate": 0.01, "loss": 2.0375, "step": 18930 }, { "epoch": 1.9430418719211824, "grad_norm": 0.037250157445669174, "learning_rate": 0.01, "loss": 2.0146, "step": 18933 }, { "epoch": 1.9433497536945814, "grad_norm": 0.07512888312339783, "learning_rate": 0.01, "loss": 2.0279, "step": 18936 }, { "epoch": 1.9436576354679804, "grad_norm": 0.12079732865095139, "learning_rate": 0.01, "loss": 2.0457, "step": 18939 }, { "epoch": 1.9439655172413794, "grad_norm": 0.0511600561439991, "learning_rate": 0.01, "loss": 2.0558, "step": 18942 }, { "epoch": 1.9442733990147785, "grad_norm": 0.06442293524742126, "learning_rate": 0.01, "loss": 2.07, "step": 18945 }, { "epoch": 1.9445812807881775, "grad_norm": 0.04928497597575188, "learning_rate": 0.01, "loss": 2.0088, "step": 18948 }, { "epoch": 1.9448891625615765, "grad_norm": 0.07882185280323029, "learning_rate": 0.01, "loss": 2.0192, "step": 18951 }, { "epoch": 1.9451970443349755, "grad_norm": 0.03649712726473808, "learning_rate": 0.01, "loss": 2.0319, "step": 18954 }, { "epoch": 1.9455049261083743, "grad_norm": 0.10654021054506302, "learning_rate": 0.01, "loss": 2.038, "step": 18957 }, { "epoch": 1.9458128078817734, "grad_norm": 0.09655455499887466, "learning_rate": 0.01, "loss": 2.0284, "step": 18960 }, { "epoch": 1.9461206896551724, "grad_norm": 0.06114486977458, "learning_rate": 0.01, "loss": 2.0578, "step": 18963 }, { "epoch": 1.9464285714285714, "grad_norm": 0.04167640954256058, "learning_rate": 0.01, "loss": 2.0229, "step": 18966 }, { "epoch": 1.9467364532019704, "grad_norm": 0.054138265550136566, "learning_rate": 0.01, "loss": 2.0439, "step": 18969 }, { "epoch": 1.9470443349753694, "grad_norm": 0.04728518798947334, "learning_rate": 0.01, "loss": 2.0485, "step": 18972 }, { "epoch": 1.9473522167487685, "grad_norm": 0.09992729872465134, "learning_rate": 0.01, "loss": 2.0581, "step": 18975 }, { "epoch": 1.9476600985221675, "grad_norm": 0.039344482123851776, "learning_rate": 0.01, "loss": 2.0213, "step": 18978 }, { "epoch": 1.9479679802955665, "grad_norm": 0.10921066254377365, "learning_rate": 0.01, "loss": 2.0538, "step": 18981 }, { "epoch": 1.9482758620689655, "grad_norm": 0.03921816125512123, "learning_rate": 0.01, "loss": 2.011, "step": 18984 }, { "epoch": 1.9485837438423645, "grad_norm": 0.08293361961841583, "learning_rate": 0.01, "loss": 2.0186, "step": 18987 }, { "epoch": 1.9488916256157636, "grad_norm": 0.08310680091381073, "learning_rate": 0.01, "loss": 2.065, "step": 18990 }, { "epoch": 1.9491995073891626, "grad_norm": 0.05105976015329361, "learning_rate": 0.01, "loss": 2.0461, "step": 18993 }, { "epoch": 1.9495073891625616, "grad_norm": 0.0387946255505085, "learning_rate": 0.01, "loss": 2.0227, "step": 18996 }, { "epoch": 1.9498152709359606, "grad_norm": 0.039592646062374115, "learning_rate": 0.01, "loss": 2.0409, "step": 18999 }, { "epoch": 1.9501231527093597, "grad_norm": 0.042499393224716187, "learning_rate": 0.01, "loss": 2.0388, "step": 19002 }, { "epoch": 1.9504310344827587, "grad_norm": 0.1202671155333519, "learning_rate": 0.01, "loss": 2.0185, "step": 19005 }, { "epoch": 1.9507389162561575, "grad_norm": 0.05047677457332611, "learning_rate": 0.01, "loss": 2.0534, "step": 19008 }, { "epoch": 1.9510467980295565, "grad_norm": 0.13210178911685944, "learning_rate": 0.01, "loss": 2.0502, "step": 19011 }, { "epoch": 1.9513546798029555, "grad_norm": 0.07093524187803268, "learning_rate": 0.01, "loss": 2.0426, "step": 19014 }, { "epoch": 1.9516625615763545, "grad_norm": 0.05528571456670761, "learning_rate": 0.01, "loss": 2.0348, "step": 19017 }, { "epoch": 1.9519704433497536, "grad_norm": 0.08988847583532333, "learning_rate": 0.01, "loss": 2.0214, "step": 19020 }, { "epoch": 1.9522783251231526, "grad_norm": 0.05767255648970604, "learning_rate": 0.01, "loss": 2.0335, "step": 19023 }, { "epoch": 1.9525862068965516, "grad_norm": 0.07641880214214325, "learning_rate": 0.01, "loss": 2.0278, "step": 19026 }, { "epoch": 1.9528940886699506, "grad_norm": 0.08751394599676132, "learning_rate": 0.01, "loss": 2.0298, "step": 19029 }, { "epoch": 1.9532019704433496, "grad_norm": 0.06144971400499344, "learning_rate": 0.01, "loss": 2.0433, "step": 19032 }, { "epoch": 1.9535098522167487, "grad_norm": 0.04502955824136734, "learning_rate": 0.01, "loss": 2.0442, "step": 19035 }, { "epoch": 1.9538177339901477, "grad_norm": 0.05031814053654671, "learning_rate": 0.01, "loss": 2.0491, "step": 19038 }, { "epoch": 1.9541256157635467, "grad_norm": 0.08885148912668228, "learning_rate": 0.01, "loss": 2.043, "step": 19041 }, { "epoch": 1.9544334975369457, "grad_norm": 0.05610232427716255, "learning_rate": 0.01, "loss": 2.0515, "step": 19044 }, { "epoch": 1.9547413793103448, "grad_norm": 0.07169227302074432, "learning_rate": 0.01, "loss": 2.0248, "step": 19047 }, { "epoch": 1.9550492610837438, "grad_norm": 0.07136573642492294, "learning_rate": 0.01, "loss": 2.0344, "step": 19050 }, { "epoch": 1.9553571428571428, "grad_norm": 0.07761941850185394, "learning_rate": 0.01, "loss": 2.0358, "step": 19053 }, { "epoch": 1.9556650246305418, "grad_norm": 0.062269363552331924, "learning_rate": 0.01, "loss": 2.0169, "step": 19056 }, { "epoch": 1.9559729064039408, "grad_norm": 0.08797021210193634, "learning_rate": 0.01, "loss": 1.9928, "step": 19059 }, { "epoch": 1.9562807881773399, "grad_norm": 0.04401189833879471, "learning_rate": 0.01, "loss": 2.0615, "step": 19062 }, { "epoch": 1.9565886699507389, "grad_norm": 0.08460985869169235, "learning_rate": 0.01, "loss": 2.0126, "step": 19065 }, { "epoch": 1.956896551724138, "grad_norm": 0.07027498632669449, "learning_rate": 0.01, "loss": 2.0038, "step": 19068 }, { "epoch": 1.957204433497537, "grad_norm": 0.06747457385063171, "learning_rate": 0.01, "loss": 2.0089, "step": 19071 }, { "epoch": 1.957512315270936, "grad_norm": 0.10890615731477737, "learning_rate": 0.01, "loss": 2.0418, "step": 19074 }, { "epoch": 1.957820197044335, "grad_norm": 0.1049666553735733, "learning_rate": 0.01, "loss": 2.0683, "step": 19077 }, { "epoch": 1.958128078817734, "grad_norm": 0.04320647194981575, "learning_rate": 0.01, "loss": 2.0371, "step": 19080 }, { "epoch": 1.958435960591133, "grad_norm": 0.06038364768028259, "learning_rate": 0.01, "loss": 2.0417, "step": 19083 }, { "epoch": 1.958743842364532, "grad_norm": 0.04486788064241409, "learning_rate": 0.01, "loss": 2.0153, "step": 19086 }, { "epoch": 1.959051724137931, "grad_norm": 0.045702993869781494, "learning_rate": 0.01, "loss": 2.0447, "step": 19089 }, { "epoch": 1.95935960591133, "grad_norm": 0.10784240067005157, "learning_rate": 0.01, "loss": 2.0231, "step": 19092 }, { "epoch": 1.959667487684729, "grad_norm": 0.07740773260593414, "learning_rate": 0.01, "loss": 2.079, "step": 19095 }, { "epoch": 1.9599753694581281, "grad_norm": 0.0517832413315773, "learning_rate": 0.01, "loss": 2.061, "step": 19098 }, { "epoch": 1.9602832512315271, "grad_norm": 0.04660172387957573, "learning_rate": 0.01, "loss": 2.026, "step": 19101 }, { "epoch": 1.9605911330049262, "grad_norm": 0.084842748939991, "learning_rate": 0.01, "loss": 2.0418, "step": 19104 }, { "epoch": 1.9608990147783252, "grad_norm": 0.10866342484951019, "learning_rate": 0.01, "loss": 2.0361, "step": 19107 }, { "epoch": 1.9612068965517242, "grad_norm": 0.060094647109508514, "learning_rate": 0.01, "loss": 2.0207, "step": 19110 }, { "epoch": 1.9615147783251232, "grad_norm": 0.04082890599966049, "learning_rate": 0.01, "loss": 1.9837, "step": 19113 }, { "epoch": 1.9618226600985222, "grad_norm": 0.09193670004606247, "learning_rate": 0.01, "loss": 2.045, "step": 19116 }, { "epoch": 1.9621305418719213, "grad_norm": 0.049036990851163864, "learning_rate": 0.01, "loss": 2.0014, "step": 19119 }, { "epoch": 1.9624384236453203, "grad_norm": 0.06658133864402771, "learning_rate": 0.01, "loss": 2.0256, "step": 19122 }, { "epoch": 1.9627463054187193, "grad_norm": 0.07290081679821014, "learning_rate": 0.01, "loss": 2.022, "step": 19125 }, { "epoch": 1.9630541871921183, "grad_norm": 0.05635548382997513, "learning_rate": 0.01, "loss": 2.0439, "step": 19128 }, { "epoch": 1.9633620689655173, "grad_norm": 0.07143761217594147, "learning_rate": 0.01, "loss": 2.009, "step": 19131 }, { "epoch": 1.9636699507389164, "grad_norm": 0.15296097099781036, "learning_rate": 0.01, "loss": 2.0539, "step": 19134 }, { "epoch": 1.9639778325123154, "grad_norm": 0.1795274019241333, "learning_rate": 0.01, "loss": 2.0418, "step": 19137 }, { "epoch": 1.9642857142857144, "grad_norm": 0.04691818729043007, "learning_rate": 0.01, "loss": 2.0114, "step": 19140 }, { "epoch": 1.9645935960591134, "grad_norm": 0.05018999055027962, "learning_rate": 0.01, "loss": 2.0558, "step": 19143 }, { "epoch": 1.9649014778325125, "grad_norm": 0.0349762961268425, "learning_rate": 0.01, "loss": 2.0409, "step": 19146 }, { "epoch": 1.9652093596059115, "grad_norm": 0.04055612534284592, "learning_rate": 0.01, "loss": 2.033, "step": 19149 }, { "epoch": 1.9655172413793105, "grad_norm": 0.04818587005138397, "learning_rate": 0.01, "loss": 2.0522, "step": 19152 }, { "epoch": 1.9658251231527095, "grad_norm": 0.03579457104206085, "learning_rate": 0.01, "loss": 2.0295, "step": 19155 }, { "epoch": 1.9661330049261085, "grad_norm": 0.04382238909602165, "learning_rate": 0.01, "loss": 2.0011, "step": 19158 }, { "epoch": 1.9664408866995073, "grad_norm": 0.03784547746181488, "learning_rate": 0.01, "loss": 2.0332, "step": 19161 }, { "epoch": 1.9667487684729064, "grad_norm": 0.049413155764341354, "learning_rate": 0.01, "loss": 2.0276, "step": 19164 }, { "epoch": 1.9670566502463054, "grad_norm": 0.10560319572687149, "learning_rate": 0.01, "loss": 2.012, "step": 19167 }, { "epoch": 1.9673645320197044, "grad_norm": 0.07912679761648178, "learning_rate": 0.01, "loss": 2.0233, "step": 19170 }, { "epoch": 1.9676724137931034, "grad_norm": 0.051868222653865814, "learning_rate": 0.01, "loss": 2.0399, "step": 19173 }, { "epoch": 1.9679802955665024, "grad_norm": 0.09925144910812378, "learning_rate": 0.01, "loss": 2.0382, "step": 19176 }, { "epoch": 1.9682881773399015, "grad_norm": 0.09824500232934952, "learning_rate": 0.01, "loss": 2.022, "step": 19179 }, { "epoch": 1.9685960591133005, "grad_norm": 0.04710378497838974, "learning_rate": 0.01, "loss": 2.018, "step": 19182 }, { "epoch": 1.9689039408866995, "grad_norm": 0.09339728951454163, "learning_rate": 0.01, "loss": 2.0623, "step": 19185 }, { "epoch": 1.9692118226600985, "grad_norm": 0.04485667496919632, "learning_rate": 0.01, "loss": 2.0361, "step": 19188 }, { "epoch": 1.9695197044334976, "grad_norm": 0.06367155909538269, "learning_rate": 0.01, "loss": 2.0269, "step": 19191 }, { "epoch": 1.9698275862068966, "grad_norm": 0.06692302227020264, "learning_rate": 0.01, "loss": 2.0475, "step": 19194 }, { "epoch": 1.9701354679802956, "grad_norm": 0.06107610464096069, "learning_rate": 0.01, "loss": 2.046, "step": 19197 }, { "epoch": 1.9704433497536946, "grad_norm": 0.06362861394882202, "learning_rate": 0.01, "loss": 2.0515, "step": 19200 }, { "epoch": 1.9707512315270936, "grad_norm": 0.07524324208498001, "learning_rate": 0.01, "loss": 2.044, "step": 19203 }, { "epoch": 1.9710591133004927, "grad_norm": 0.09118182212114334, "learning_rate": 0.01, "loss": 2.0501, "step": 19206 }, { "epoch": 1.9713669950738915, "grad_norm": 0.0823112204670906, "learning_rate": 0.01, "loss": 2.0305, "step": 19209 }, { "epoch": 1.9716748768472905, "grad_norm": 0.061318982392549515, "learning_rate": 0.01, "loss": 2.0499, "step": 19212 }, { "epoch": 1.9719827586206895, "grad_norm": 0.09838750958442688, "learning_rate": 0.01, "loss": 2.0211, "step": 19215 }, { "epoch": 1.9722906403940885, "grad_norm": 0.061727046966552734, "learning_rate": 0.01, "loss": 2.0671, "step": 19218 }, { "epoch": 1.9725985221674875, "grad_norm": 0.044177260249853134, "learning_rate": 0.01, "loss": 2.0429, "step": 19221 }, { "epoch": 1.9729064039408866, "grad_norm": 0.031012659892439842, "learning_rate": 0.01, "loss": 2.0204, "step": 19224 }, { "epoch": 1.9732142857142856, "grad_norm": 0.0593150295317173, "learning_rate": 0.01, "loss": 2.0418, "step": 19227 }, { "epoch": 1.9735221674876846, "grad_norm": 0.09283222258090973, "learning_rate": 0.01, "loss": 2.0363, "step": 19230 }, { "epoch": 1.9738300492610836, "grad_norm": 0.07416541129350662, "learning_rate": 0.01, "loss": 2.0101, "step": 19233 }, { "epoch": 1.9741379310344827, "grad_norm": 0.08513590693473816, "learning_rate": 0.01, "loss": 2.0284, "step": 19236 }, { "epoch": 1.9744458128078817, "grad_norm": 0.08401728421449661, "learning_rate": 0.01, "loss": 2.0356, "step": 19239 }, { "epoch": 1.9747536945812807, "grad_norm": 0.08488047868013382, "learning_rate": 0.01, "loss": 2.0408, "step": 19242 }, { "epoch": 1.9750615763546797, "grad_norm": 0.11438726633787155, "learning_rate": 0.01, "loss": 2.0439, "step": 19245 }, { "epoch": 1.9753694581280787, "grad_norm": 0.0416182205080986, "learning_rate": 0.01, "loss": 2.0034, "step": 19248 }, { "epoch": 1.9756773399014778, "grad_norm": 0.046806883066892624, "learning_rate": 0.01, "loss": 2.0307, "step": 19251 }, { "epoch": 1.9759852216748768, "grad_norm": 0.04319307208061218, "learning_rate": 0.01, "loss": 2.0404, "step": 19254 }, { "epoch": 1.9762931034482758, "grad_norm": 0.11832991987466812, "learning_rate": 0.01, "loss": 2.0338, "step": 19257 }, { "epoch": 1.9766009852216748, "grad_norm": 0.04716213047504425, "learning_rate": 0.01, "loss": 2.005, "step": 19260 }, { "epoch": 1.9769088669950738, "grad_norm": 0.08626002073287964, "learning_rate": 0.01, "loss": 2.0178, "step": 19263 }, { "epoch": 1.9772167487684729, "grad_norm": 0.0981634259223938, "learning_rate": 0.01, "loss": 2.0502, "step": 19266 }, { "epoch": 1.9775246305418719, "grad_norm": 0.0657229796051979, "learning_rate": 0.01, "loss": 2.065, "step": 19269 }, { "epoch": 1.977832512315271, "grad_norm": 0.0652332603931427, "learning_rate": 0.01, "loss": 2.0395, "step": 19272 }, { "epoch": 1.97814039408867, "grad_norm": 0.06810397654771805, "learning_rate": 0.01, "loss": 2.0418, "step": 19275 }, { "epoch": 1.978448275862069, "grad_norm": 0.04740637540817261, "learning_rate": 0.01, "loss": 2.0456, "step": 19278 }, { "epoch": 1.978756157635468, "grad_norm": 0.039233241230249405, "learning_rate": 0.01, "loss": 2.0348, "step": 19281 }, { "epoch": 1.979064039408867, "grad_norm": 0.07533819228410721, "learning_rate": 0.01, "loss": 2.0411, "step": 19284 }, { "epoch": 1.979371921182266, "grad_norm": 0.0820235162973404, "learning_rate": 0.01, "loss": 2.0299, "step": 19287 }, { "epoch": 1.979679802955665, "grad_norm": 0.057419124990701675, "learning_rate": 0.01, "loss": 2.0692, "step": 19290 }, { "epoch": 1.979987684729064, "grad_norm": 0.10119790583848953, "learning_rate": 0.01, "loss": 2.0752, "step": 19293 }, { "epoch": 1.980295566502463, "grad_norm": 0.116152822971344, "learning_rate": 0.01, "loss": 2.0377, "step": 19296 }, { "epoch": 1.980603448275862, "grad_norm": 0.05364501103758812, "learning_rate": 0.01, "loss": 2.0118, "step": 19299 }, { "epoch": 1.9809113300492611, "grad_norm": 0.09089913219213486, "learning_rate": 0.01, "loss": 2.0445, "step": 19302 }, { "epoch": 1.9812192118226601, "grad_norm": 0.06570890545845032, "learning_rate": 0.01, "loss": 2.048, "step": 19305 }, { "epoch": 1.9815270935960592, "grad_norm": 0.10739763081073761, "learning_rate": 0.01, "loss": 2.0527, "step": 19308 }, { "epoch": 1.9818349753694582, "grad_norm": 0.0396854430437088, "learning_rate": 0.01, "loss": 2.0414, "step": 19311 }, { "epoch": 1.9821428571428572, "grad_norm": 0.11273244023323059, "learning_rate": 0.01, "loss": 2.0447, "step": 19314 }, { "epoch": 1.9824507389162562, "grad_norm": 0.10009465366601944, "learning_rate": 0.01, "loss": 2.0512, "step": 19317 }, { "epoch": 1.9827586206896552, "grad_norm": 0.053756825625896454, "learning_rate": 0.01, "loss": 2.0731, "step": 19320 }, { "epoch": 1.9830665024630543, "grad_norm": 0.06603456288576126, "learning_rate": 0.01, "loss": 2.0399, "step": 19323 }, { "epoch": 1.9833743842364533, "grad_norm": 0.038810715079307556, "learning_rate": 0.01, "loss": 2.0272, "step": 19326 }, { "epoch": 1.9836822660098523, "grad_norm": 0.04284658655524254, "learning_rate": 0.01, "loss": 2.035, "step": 19329 }, { "epoch": 1.9839901477832513, "grad_norm": 0.04441271349787712, "learning_rate": 0.01, "loss": 2.0448, "step": 19332 }, { "epoch": 1.9842980295566504, "grad_norm": 0.04501213878393173, "learning_rate": 0.01, "loss": 2.0517, "step": 19335 }, { "epoch": 1.9846059113300494, "grad_norm": 0.05109642818570137, "learning_rate": 0.01, "loss": 2.0168, "step": 19338 }, { "epoch": 1.9849137931034484, "grad_norm": 0.03543083369731903, "learning_rate": 0.01, "loss": 2.0396, "step": 19341 }, { "epoch": 1.9852216748768474, "grad_norm": 0.04665149003267288, "learning_rate": 0.01, "loss": 2.0285, "step": 19344 }, { "epoch": 1.9855295566502464, "grad_norm": 0.035318441689014435, "learning_rate": 0.01, "loss": 2.0321, "step": 19347 }, { "epoch": 1.9858374384236455, "grad_norm": 0.035862043499946594, "learning_rate": 0.01, "loss": 2.0461, "step": 19350 }, { "epoch": 1.9861453201970445, "grad_norm": 0.128739133477211, "learning_rate": 0.01, "loss": 2.0561, "step": 19353 }, { "epoch": 1.9864532019704435, "grad_norm": 0.08115250617265701, "learning_rate": 0.01, "loss": 2.0364, "step": 19356 }, { "epoch": 1.9867610837438425, "grad_norm": 0.04203096404671669, "learning_rate": 0.01, "loss": 2.0298, "step": 19359 }, { "epoch": 1.9870689655172413, "grad_norm": 0.03801970183849335, "learning_rate": 0.01, "loss": 2.034, "step": 19362 }, { "epoch": 1.9873768472906403, "grad_norm": 0.05322232097387314, "learning_rate": 0.01, "loss": 2.0519, "step": 19365 }, { "epoch": 1.9876847290640394, "grad_norm": 0.037100568413734436, "learning_rate": 0.01, "loss": 2.0087, "step": 19368 }, { "epoch": 1.9879926108374384, "grad_norm": 0.03714398667216301, "learning_rate": 0.01, "loss": 2.0187, "step": 19371 }, { "epoch": 1.9883004926108374, "grad_norm": 0.050371263176202774, "learning_rate": 0.01, "loss": 2.0318, "step": 19374 }, { "epoch": 1.9886083743842364, "grad_norm": 0.03875119984149933, "learning_rate": 0.01, "loss": 2.0224, "step": 19377 }, { "epoch": 1.9889162561576355, "grad_norm": 0.06838756054639816, "learning_rate": 0.01, "loss": 2.0458, "step": 19380 }, { "epoch": 1.9892241379310345, "grad_norm": 0.04749476909637451, "learning_rate": 0.01, "loss": 2.022, "step": 19383 }, { "epoch": 1.9895320197044335, "grad_norm": 0.041247084736824036, "learning_rate": 0.01, "loss": 2.0088, "step": 19386 }, { "epoch": 1.9898399014778325, "grad_norm": 0.08582460135221481, "learning_rate": 0.01, "loss": 2.0061, "step": 19389 }, { "epoch": 1.9901477832512315, "grad_norm": 0.042033273726701736, "learning_rate": 0.01, "loss": 2.0516, "step": 19392 }, { "epoch": 1.9904556650246306, "grad_norm": 0.08395756036043167, "learning_rate": 0.01, "loss": 2.0338, "step": 19395 }, { "epoch": 1.9907635467980296, "grad_norm": 0.07154903560876846, "learning_rate": 0.01, "loss": 2.0168, "step": 19398 }, { "epoch": 1.9910714285714286, "grad_norm": 0.06137581169605255, "learning_rate": 0.01, "loss": 2.0046, "step": 19401 }, { "epoch": 1.9913793103448276, "grad_norm": 0.1226835623383522, "learning_rate": 0.01, "loss": 2.0314, "step": 19404 }, { "epoch": 1.9916871921182266, "grad_norm": 0.06524399667978287, "learning_rate": 0.01, "loss": 2.0581, "step": 19407 }, { "epoch": 1.9919950738916257, "grad_norm": 0.060310300439596176, "learning_rate": 0.01, "loss": 2.0205, "step": 19410 }, { "epoch": 1.9923029556650245, "grad_norm": 0.10605314373970032, "learning_rate": 0.01, "loss": 2.0461, "step": 19413 }, { "epoch": 1.9926108374384235, "grad_norm": 0.07056690007448196, "learning_rate": 0.01, "loss": 2.0603, "step": 19416 }, { "epoch": 1.9929187192118225, "grad_norm": 0.04367789626121521, "learning_rate": 0.01, "loss": 2.0358, "step": 19419 }, { "epoch": 1.9932266009852215, "grad_norm": 0.07856806367635727, "learning_rate": 0.01, "loss": 2.061, "step": 19422 }, { "epoch": 1.9935344827586206, "grad_norm": 0.07237541675567627, "learning_rate": 0.01, "loss": 2.0218, "step": 19425 }, { "epoch": 1.9938423645320196, "grad_norm": 0.04983443021774292, "learning_rate": 0.01, "loss": 2.0204, "step": 19428 }, { "epoch": 1.9941502463054186, "grad_norm": 0.049045633524656296, "learning_rate": 0.01, "loss": 2.0089, "step": 19431 }, { "epoch": 1.9944581280788176, "grad_norm": 0.07521536946296692, "learning_rate": 0.01, "loss": 2.021, "step": 19434 }, { "epoch": 1.9947660098522166, "grad_norm": 0.03521602228283882, "learning_rate": 0.01, "loss": 2.0386, "step": 19437 }, { "epoch": 1.9950738916256157, "grad_norm": 0.06075441092252731, "learning_rate": 0.01, "loss": 2.045, "step": 19440 }, { "epoch": 1.9953817733990147, "grad_norm": 0.08800282329320908, "learning_rate": 0.01, "loss": 2.0511, "step": 19443 }, { "epoch": 1.9956896551724137, "grad_norm": 0.04632639139890671, "learning_rate": 0.01, "loss": 2.0434, "step": 19446 }, { "epoch": 1.9959975369458127, "grad_norm": 0.05275778844952583, "learning_rate": 0.01, "loss": 2.0036, "step": 19449 }, { "epoch": 1.9963054187192117, "grad_norm": 0.04615132138133049, "learning_rate": 0.01, "loss": 1.9958, "step": 19452 }, { "epoch": 1.9966133004926108, "grad_norm": 0.12586715817451477, "learning_rate": 0.01, "loss": 2.0311, "step": 19455 }, { "epoch": 1.9969211822660098, "grad_norm": 0.09406362473964691, "learning_rate": 0.01, "loss": 2.028, "step": 19458 }, { "epoch": 1.9972290640394088, "grad_norm": 0.032408129423856735, "learning_rate": 0.01, "loss": 2.0311, "step": 19461 }, { "epoch": 1.9975369458128078, "grad_norm": 0.08810164034366608, "learning_rate": 0.01, "loss": 2.0364, "step": 19464 }, { "epoch": 1.9978448275862069, "grad_norm": 0.11263968795537949, "learning_rate": 0.01, "loss": 2.0263, "step": 19467 }, { "epoch": 1.9981527093596059, "grad_norm": 0.06618282198905945, "learning_rate": 0.01, "loss": 2.0435, "step": 19470 }, { "epoch": 1.998460591133005, "grad_norm": 0.03649067133665085, "learning_rate": 0.01, "loss": 2.0131, "step": 19473 }, { "epoch": 1.998768472906404, "grad_norm": 0.03718538209795952, "learning_rate": 0.01, "loss": 2.0517, "step": 19476 }, { "epoch": 1.999076354679803, "grad_norm": 0.047908563166856766, "learning_rate": 0.01, "loss": 2.0162, "step": 19479 }, { "epoch": 1.999384236453202, "grad_norm": 0.04926212877035141, "learning_rate": 0.01, "loss": 2.0401, "step": 19482 }, { "epoch": 1.999692118226601, "grad_norm": 0.08558470755815506, "learning_rate": 0.01, "loss": 2.0611, "step": 19485 }, { "epoch": 2.0, "grad_norm": 0.07099032402038574, "learning_rate": 0.01, "loss": 2.0408, "step": 19488 }, { "epoch": 2.003391921060746, "grad_norm": 0.08354249596595764, "learning_rate": 0.01, "loss": 2.065, "step": 19491 }, { "epoch": 2.003700277520814, "grad_norm": 0.09245558828115463, "learning_rate": 0.01, "loss": 2.0571, "step": 19494 }, { "epoch": 2.004008633980882, "grad_norm": 0.09228463470935822, "learning_rate": 0.01, "loss": 2.0749, "step": 19497 }, { "epoch": 2.00431699044095, "grad_norm": 0.05558445304632187, "learning_rate": 0.01, "loss": 2.0754, "step": 19500 }, { "epoch": 2.0046253469010176, "grad_norm": 0.04827789589762688, "learning_rate": 0.01, "loss": 2.0431, "step": 19503 }, { "epoch": 2.0049337033610852, "grad_norm": 0.063465915620327, "learning_rate": 0.01, "loss": 2.067, "step": 19506 }, { "epoch": 2.0052420598211533, "grad_norm": 0.047648850828409195, "learning_rate": 0.01, "loss": 2.0614, "step": 19509 }, { "epoch": 2.005550416281221, "grad_norm": 0.038050852715969086, "learning_rate": 0.01, "loss": 2.0699, "step": 19512 }, { "epoch": 2.005858772741289, "grad_norm": 0.036580201238393784, "learning_rate": 0.01, "loss": 2.0832, "step": 19515 }, { "epoch": 2.0061671292013568, "grad_norm": 0.033919982612133026, "learning_rate": 0.01, "loss": 2.0492, "step": 19518 }, { "epoch": 2.0064754856614244, "grad_norm": 0.05007147789001465, "learning_rate": 0.01, "loss": 2.0662, "step": 19521 }, { "epoch": 2.0067838421214925, "grad_norm": 0.042562540620565414, "learning_rate": 0.01, "loss": 2.0508, "step": 19524 }, { "epoch": 2.00709219858156, "grad_norm": 0.0872044712305069, "learning_rate": 0.01, "loss": 2.0628, "step": 19527 }, { "epoch": 2.0074005550416283, "grad_norm": 0.06331472843885422, "learning_rate": 0.01, "loss": 2.0479, "step": 19530 }, { "epoch": 2.007708911501696, "grad_norm": 0.08928905427455902, "learning_rate": 0.01, "loss": 2.051, "step": 19533 }, { "epoch": 2.0080172679617636, "grad_norm": 0.0869508758187294, "learning_rate": 0.01, "loss": 2.0497, "step": 19536 }, { "epoch": 2.0083256244218317, "grad_norm": 0.04267793521285057, "learning_rate": 0.01, "loss": 2.0457, "step": 19539 }, { "epoch": 2.0086339808818994, "grad_norm": 0.046922095119953156, "learning_rate": 0.01, "loss": 2.0488, "step": 19542 }, { "epoch": 2.0089423373419675, "grad_norm": 0.07374055683612823, "learning_rate": 0.01, "loss": 2.0571, "step": 19545 }, { "epoch": 2.009250693802035, "grad_norm": 0.042078323662281036, "learning_rate": 0.01, "loss": 2.0501, "step": 19548 }, { "epoch": 2.009559050262103, "grad_norm": 0.052491504698991776, "learning_rate": 0.01, "loss": 2.0996, "step": 19551 }, { "epoch": 2.009867406722171, "grad_norm": 0.04900294169783592, "learning_rate": 0.01, "loss": 2.0476, "step": 19554 }, { "epoch": 2.0101757631822386, "grad_norm": 0.13067513704299927, "learning_rate": 0.01, "loss": 2.066, "step": 19557 }, { "epoch": 2.0104841196423067, "grad_norm": 0.09229371696710587, "learning_rate": 0.01, "loss": 2.0708, "step": 19560 }, { "epoch": 2.0107924761023743, "grad_norm": 0.05014317110180855, "learning_rate": 0.01, "loss": 2.0474, "step": 19563 }, { "epoch": 2.011100832562442, "grad_norm": 0.06385400146245956, "learning_rate": 0.01, "loss": 2.0427, "step": 19566 }, { "epoch": 2.01140918902251, "grad_norm": 0.04037034139037132, "learning_rate": 0.01, "loss": 2.0508, "step": 19569 }, { "epoch": 2.0117175454825778, "grad_norm": 0.02967817150056362, "learning_rate": 0.01, "loss": 2.0548, "step": 19572 }, { "epoch": 2.012025901942646, "grad_norm": 0.04519663751125336, "learning_rate": 0.01, "loss": 2.0497, "step": 19575 }, { "epoch": 2.0123342584027135, "grad_norm": 0.07825223356485367, "learning_rate": 0.01, "loss": 2.06, "step": 19578 }, { "epoch": 2.012642614862781, "grad_norm": 0.13088425993919373, "learning_rate": 0.01, "loss": 2.0546, "step": 19581 }, { "epoch": 2.0129509713228493, "grad_norm": 0.10113450884819031, "learning_rate": 0.01, "loss": 2.0628, "step": 19584 }, { "epoch": 2.013259327782917, "grad_norm": 0.06662772595882416, "learning_rate": 0.01, "loss": 2.0655, "step": 19587 }, { "epoch": 2.013567684242985, "grad_norm": 0.04824177175760269, "learning_rate": 0.01, "loss": 2.0853, "step": 19590 }, { "epoch": 2.0138760407030527, "grad_norm": 0.07255363464355469, "learning_rate": 0.01, "loss": 2.0614, "step": 19593 }, { "epoch": 2.0141843971631204, "grad_norm": 0.061763517558574677, "learning_rate": 0.01, "loss": 2.0761, "step": 19596 }, { "epoch": 2.0144927536231885, "grad_norm": 0.058266837149858475, "learning_rate": 0.01, "loss": 2.0495, "step": 19599 }, { "epoch": 2.014801110083256, "grad_norm": 0.05644237995147705, "learning_rate": 0.01, "loss": 2.0416, "step": 19602 }, { "epoch": 2.0151094665433242, "grad_norm": 0.102548748254776, "learning_rate": 0.01, "loss": 2.0581, "step": 19605 }, { "epoch": 2.015417823003392, "grad_norm": 0.06959159672260284, "learning_rate": 0.01, "loss": 2.0288, "step": 19608 }, { "epoch": 2.0157261794634596, "grad_norm": 0.09066049009561539, "learning_rate": 0.01, "loss": 2.045, "step": 19611 }, { "epoch": 2.0160345359235277, "grad_norm": 0.061073388904333115, "learning_rate": 0.01, "loss": 2.0523, "step": 19614 }, { "epoch": 2.0163428923835953, "grad_norm": 0.09000861644744873, "learning_rate": 0.01, "loss": 2.0501, "step": 19617 }, { "epoch": 2.0166512488436634, "grad_norm": 0.040078576654195786, "learning_rate": 0.01, "loss": 2.0409, "step": 19620 }, { "epoch": 2.016959605303731, "grad_norm": 0.045984748750925064, "learning_rate": 0.01, "loss": 2.0456, "step": 19623 }, { "epoch": 2.0172679617637987, "grad_norm": 0.045942965894937515, "learning_rate": 0.01, "loss": 2.0319, "step": 19626 }, { "epoch": 2.017576318223867, "grad_norm": 0.059133514761924744, "learning_rate": 0.01, "loss": 2.0504, "step": 19629 }, { "epoch": 2.0178846746839345, "grad_norm": 0.07386631518602371, "learning_rate": 0.01, "loss": 2.0367, "step": 19632 }, { "epoch": 2.0181930311440026, "grad_norm": 0.06906817853450775, "learning_rate": 0.01, "loss": 2.0491, "step": 19635 }, { "epoch": 2.0185013876040703, "grad_norm": 0.06259379535913467, "learning_rate": 0.01, "loss": 2.0812, "step": 19638 }, { "epoch": 2.018809744064138, "grad_norm": 0.07011371850967407, "learning_rate": 0.01, "loss": 2.0716, "step": 19641 }, { "epoch": 2.019118100524206, "grad_norm": 0.05763932690024376, "learning_rate": 0.01, "loss": 2.0618, "step": 19644 }, { "epoch": 2.0194264569842737, "grad_norm": 0.09810350090265274, "learning_rate": 0.01, "loss": 2.046, "step": 19647 }, { "epoch": 2.019734813444342, "grad_norm": 0.11434987187385559, "learning_rate": 0.01, "loss": 2.057, "step": 19650 }, { "epoch": 2.0200431699044095, "grad_norm": 0.10692505538463593, "learning_rate": 0.01, "loss": 2.0669, "step": 19653 }, { "epoch": 2.020351526364477, "grad_norm": 0.06918302178382874, "learning_rate": 0.01, "loss": 2.0636, "step": 19656 }, { "epoch": 2.0206598828245452, "grad_norm": 0.06661045551300049, "learning_rate": 0.01, "loss": 2.0366, "step": 19659 }, { "epoch": 2.020968239284613, "grad_norm": 0.03996479883790016, "learning_rate": 0.01, "loss": 2.0552, "step": 19662 }, { "epoch": 2.021276595744681, "grad_norm": 0.041359271854162216, "learning_rate": 0.01, "loss": 2.0542, "step": 19665 }, { "epoch": 2.0215849522047487, "grad_norm": 0.046693217009305954, "learning_rate": 0.01, "loss": 2.0448, "step": 19668 }, { "epoch": 2.0218933086648163, "grad_norm": 0.031555816531181335, "learning_rate": 0.01, "loss": 2.0146, "step": 19671 }, { "epoch": 2.0222016651248844, "grad_norm": 0.036573588848114014, "learning_rate": 0.01, "loss": 2.038, "step": 19674 }, { "epoch": 2.022510021584952, "grad_norm": 0.09064050763845444, "learning_rate": 0.01, "loss": 2.0609, "step": 19677 }, { "epoch": 2.02281837804502, "grad_norm": 0.11865704506635666, "learning_rate": 0.01, "loss": 2.0412, "step": 19680 }, { "epoch": 2.023126734505088, "grad_norm": 0.08720502257347107, "learning_rate": 0.01, "loss": 2.0403, "step": 19683 }, { "epoch": 2.0234350909651555, "grad_norm": 0.06953457742929459, "learning_rate": 0.01, "loss": 2.0769, "step": 19686 }, { "epoch": 2.0237434474252236, "grad_norm": 0.04386308416724205, "learning_rate": 0.01, "loss": 2.0595, "step": 19689 }, { "epoch": 2.0240518038852913, "grad_norm": 0.047490183264017105, "learning_rate": 0.01, "loss": 2.0478, "step": 19692 }, { "epoch": 2.0243601603453594, "grad_norm": 0.061406608670949936, "learning_rate": 0.01, "loss": 2.0582, "step": 19695 }, { "epoch": 2.024668516805427, "grad_norm": 0.0626315325498581, "learning_rate": 0.01, "loss": 2.0908, "step": 19698 }, { "epoch": 2.0249768732654947, "grad_norm": 0.048075366765260696, "learning_rate": 0.01, "loss": 2.0703, "step": 19701 }, { "epoch": 2.025285229725563, "grad_norm": 0.06243044510483742, "learning_rate": 0.01, "loss": 2.0452, "step": 19704 }, { "epoch": 2.0255935861856305, "grad_norm": 0.06498084217309952, "learning_rate": 0.01, "loss": 2.0812, "step": 19707 }, { "epoch": 2.0259019426456986, "grad_norm": 0.05091014504432678, "learning_rate": 0.01, "loss": 2.0451, "step": 19710 }, { "epoch": 2.026210299105766, "grad_norm": 0.04733705893158913, "learning_rate": 0.01, "loss": 2.059, "step": 19713 }, { "epoch": 2.026518655565834, "grad_norm": 0.10866537690162659, "learning_rate": 0.01, "loss": 2.0505, "step": 19716 }, { "epoch": 2.026827012025902, "grad_norm": 0.07504774630069733, "learning_rate": 0.01, "loss": 2.0318, "step": 19719 }, { "epoch": 2.0271353684859696, "grad_norm": 0.07938455790281296, "learning_rate": 0.01, "loss": 2.0643, "step": 19722 }, { "epoch": 2.0274437249460378, "grad_norm": 0.10090157389640808, "learning_rate": 0.01, "loss": 2.0381, "step": 19725 }, { "epoch": 2.0277520814061054, "grad_norm": 0.04133126139640808, "learning_rate": 0.01, "loss": 2.0126, "step": 19728 }, { "epoch": 2.0280604378661735, "grad_norm": 0.12022694200277328, "learning_rate": 0.01, "loss": 2.0463, "step": 19731 }, { "epoch": 2.028368794326241, "grad_norm": 0.05904841795563698, "learning_rate": 0.01, "loss": 2.0547, "step": 19734 }, { "epoch": 2.028677150786309, "grad_norm": 0.08344896882772446, "learning_rate": 0.01, "loss": 2.0721, "step": 19737 }, { "epoch": 2.028985507246377, "grad_norm": 0.045264534652233124, "learning_rate": 0.01, "loss": 2.0597, "step": 19740 }, { "epoch": 2.0292938637064446, "grad_norm": 0.05907116085290909, "learning_rate": 0.01, "loss": 2.0561, "step": 19743 }, { "epoch": 2.0296022201665127, "grad_norm": 0.04975851625204086, "learning_rate": 0.01, "loss": 2.0642, "step": 19746 }, { "epoch": 2.0299105766265804, "grad_norm": 0.08190937340259552, "learning_rate": 0.01, "loss": 2.0429, "step": 19749 }, { "epoch": 2.030218933086648, "grad_norm": 0.14594541490077972, "learning_rate": 0.01, "loss": 2.0438, "step": 19752 }, { "epoch": 2.030527289546716, "grad_norm": 0.11920581012964249, "learning_rate": 0.01, "loss": 2.0549, "step": 19755 }, { "epoch": 2.030835646006784, "grad_norm": 0.04334663227200508, "learning_rate": 0.01, "loss": 2.0835, "step": 19758 }, { "epoch": 2.031144002466852, "grad_norm": 0.05323721095919609, "learning_rate": 0.01, "loss": 2.0546, "step": 19761 }, { "epoch": 2.0314523589269196, "grad_norm": 0.09565315395593643, "learning_rate": 0.01, "loss": 2.0622, "step": 19764 }, { "epoch": 2.031760715386987, "grad_norm": 0.14498768746852875, "learning_rate": 0.01, "loss": 2.0336, "step": 19767 }, { "epoch": 2.0320690718470553, "grad_norm": 0.16146855056285858, "learning_rate": 0.01, "loss": 2.0444, "step": 19770 }, { "epoch": 2.032377428307123, "grad_norm": 0.09023015946149826, "learning_rate": 0.01, "loss": 2.0341, "step": 19773 }, { "epoch": 2.032685784767191, "grad_norm": 0.05290327966213226, "learning_rate": 0.01, "loss": 2.0442, "step": 19776 }, { "epoch": 2.0329941412272587, "grad_norm": 0.06551158428192139, "learning_rate": 0.01, "loss": 2.062, "step": 19779 }, { "epoch": 2.0333024976873264, "grad_norm": 0.09268030524253845, "learning_rate": 0.01, "loss": 2.0668, "step": 19782 }, { "epoch": 2.0336108541473945, "grad_norm": 0.05402594432234764, "learning_rate": 0.01, "loss": 2.0739, "step": 19785 }, { "epoch": 2.033919210607462, "grad_norm": 0.052478570491075516, "learning_rate": 0.01, "loss": 2.0709, "step": 19788 }, { "epoch": 2.0342275670675303, "grad_norm": 0.03243448957800865, "learning_rate": 0.01, "loss": 2.049, "step": 19791 }, { "epoch": 2.034535923527598, "grad_norm": 0.08627558499574661, "learning_rate": 0.01, "loss": 2.058, "step": 19794 }, { "epoch": 2.0348442799876656, "grad_norm": 0.04757314547896385, "learning_rate": 0.01, "loss": 2.0637, "step": 19797 }, { "epoch": 2.0351526364477337, "grad_norm": 0.11217369884252548, "learning_rate": 0.01, "loss": 2.0581, "step": 19800 }, { "epoch": 2.0354609929078014, "grad_norm": 0.07525690644979477, "learning_rate": 0.01, "loss": 2.0782, "step": 19803 }, { "epoch": 2.0357693493678695, "grad_norm": 0.0945955365896225, "learning_rate": 0.01, "loss": 2.0594, "step": 19806 }, { "epoch": 2.036077705827937, "grad_norm": 0.07789472490549088, "learning_rate": 0.01, "loss": 2.0444, "step": 19809 }, { "epoch": 2.036386062288005, "grad_norm": 0.06672658026218414, "learning_rate": 0.01, "loss": 2.0392, "step": 19812 }, { "epoch": 2.036694418748073, "grad_norm": 0.06361529976129532, "learning_rate": 0.01, "loss": 2.0504, "step": 19815 }, { "epoch": 2.0370027752081405, "grad_norm": 0.03530391305685043, "learning_rate": 0.01, "loss": 2.0453, "step": 19818 }, { "epoch": 2.0373111316682087, "grad_norm": 0.08201812207698822, "learning_rate": 0.01, "loss": 2.023, "step": 19821 }, { "epoch": 2.0376194881282763, "grad_norm": 0.09198293834924698, "learning_rate": 0.01, "loss": 2.0422, "step": 19824 }, { "epoch": 2.037927844588344, "grad_norm": 0.058875374495983124, "learning_rate": 0.01, "loss": 2.0484, "step": 19827 }, { "epoch": 2.038236201048412, "grad_norm": 0.04453382268548012, "learning_rate": 0.01, "loss": 2.0322, "step": 19830 }, { "epoch": 2.0385445575084797, "grad_norm": 0.03713817149400711, "learning_rate": 0.01, "loss": 2.0151, "step": 19833 }, { "epoch": 2.038852913968548, "grad_norm": 0.056827936321496964, "learning_rate": 0.01, "loss": 2.0532, "step": 19836 }, { "epoch": 2.0391612704286155, "grad_norm": 0.08166830986738205, "learning_rate": 0.01, "loss": 2.0602, "step": 19839 }, { "epoch": 2.039469626888683, "grad_norm": 0.06837287545204163, "learning_rate": 0.01, "loss": 2.0567, "step": 19842 }, { "epoch": 2.0397779833487513, "grad_norm": 0.08867949992418289, "learning_rate": 0.01, "loss": 2.0559, "step": 19845 }, { "epoch": 2.040086339808819, "grad_norm": 0.07119370251893997, "learning_rate": 0.01, "loss": 2.0357, "step": 19848 }, { "epoch": 2.040394696268887, "grad_norm": 0.07701986283063889, "learning_rate": 0.01, "loss": 2.0695, "step": 19851 }, { "epoch": 2.0407030527289547, "grad_norm": 0.04700729623436928, "learning_rate": 0.01, "loss": 2.0266, "step": 19854 }, { "epoch": 2.0410114091890224, "grad_norm": 0.05898338556289673, "learning_rate": 0.01, "loss": 2.0519, "step": 19857 }, { "epoch": 2.0413197656490905, "grad_norm": 0.11953815072774887, "learning_rate": 0.01, "loss": 2.0487, "step": 19860 }, { "epoch": 2.041628122109158, "grad_norm": 0.09704854339361191, "learning_rate": 0.01, "loss": 2.0511, "step": 19863 }, { "epoch": 2.041936478569226, "grad_norm": 0.1362537145614624, "learning_rate": 0.01, "loss": 2.0354, "step": 19866 }, { "epoch": 2.042244835029294, "grad_norm": 0.09366025030612946, "learning_rate": 0.01, "loss": 2.0628, "step": 19869 }, { "epoch": 2.0425531914893615, "grad_norm": 0.05397522822022438, "learning_rate": 0.01, "loss": 2.0646, "step": 19872 }, { "epoch": 2.0428615479494296, "grad_norm": 0.07723390311002731, "learning_rate": 0.01, "loss": 2.0523, "step": 19875 }, { "epoch": 2.0431699044094973, "grad_norm": 0.08418615907430649, "learning_rate": 0.01, "loss": 2.0878, "step": 19878 }, { "epoch": 2.0434782608695654, "grad_norm": 0.06149798631668091, "learning_rate": 0.01, "loss": 2.0547, "step": 19881 }, { "epoch": 2.043786617329633, "grad_norm": 0.0474097803235054, "learning_rate": 0.01, "loss": 2.0539, "step": 19884 }, { "epoch": 2.0440949737897007, "grad_norm": 0.04854200407862663, "learning_rate": 0.01, "loss": 2.0637, "step": 19887 }, { "epoch": 2.044403330249769, "grad_norm": 0.04509511590003967, "learning_rate": 0.01, "loss": 2.0679, "step": 19890 }, { "epoch": 2.0447116867098365, "grad_norm": 0.05422825738787651, "learning_rate": 0.01, "loss": 2.0449, "step": 19893 }, { "epoch": 2.0450200431699046, "grad_norm": 0.06556607037782669, "learning_rate": 0.01, "loss": 2.0429, "step": 19896 }, { "epoch": 2.0453283996299723, "grad_norm": 0.03906751424074173, "learning_rate": 0.01, "loss": 2.0694, "step": 19899 }, { "epoch": 2.04563675609004, "grad_norm": 0.05207069590687752, "learning_rate": 0.01, "loss": 2.0363, "step": 19902 }, { "epoch": 2.045945112550108, "grad_norm": 0.04187217727303505, "learning_rate": 0.01, "loss": 2.0413, "step": 19905 }, { "epoch": 2.0462534690101757, "grad_norm": 0.04163263365626335, "learning_rate": 0.01, "loss": 2.0535, "step": 19908 }, { "epoch": 2.046561825470244, "grad_norm": 0.037544943392276764, "learning_rate": 0.01, "loss": 2.033, "step": 19911 }, { "epoch": 2.0468701819303114, "grad_norm": 0.03623516857624054, "learning_rate": 0.01, "loss": 2.0758, "step": 19914 }, { "epoch": 2.047178538390379, "grad_norm": 0.08026546239852905, "learning_rate": 0.01, "loss": 2.0654, "step": 19917 }, { "epoch": 2.047486894850447, "grad_norm": 0.05316372588276863, "learning_rate": 0.01, "loss": 2.0751, "step": 19920 }, { "epoch": 2.047795251310515, "grad_norm": 0.062127552926540375, "learning_rate": 0.01, "loss": 2.0458, "step": 19923 }, { "epoch": 2.048103607770583, "grad_norm": 0.049675267189741135, "learning_rate": 0.01, "loss": 2.0741, "step": 19926 }, { "epoch": 2.0484119642306506, "grad_norm": 0.0425347164273262, "learning_rate": 0.01, "loss": 2.0573, "step": 19929 }, { "epoch": 2.0487203206907183, "grad_norm": 0.03532329574227333, "learning_rate": 0.01, "loss": 2.0367, "step": 19932 }, { "epoch": 2.0490286771507864, "grad_norm": 0.05779660493135452, "learning_rate": 0.01, "loss": 2.0801, "step": 19935 }, { "epoch": 2.049337033610854, "grad_norm": 0.07841507345438004, "learning_rate": 0.01, "loss": 2.0603, "step": 19938 }, { "epoch": 2.049645390070922, "grad_norm": 0.0883709266781807, "learning_rate": 0.01, "loss": 2.0594, "step": 19941 }, { "epoch": 2.04995374653099, "grad_norm": 0.09949532151222229, "learning_rate": 0.01, "loss": 2.0422, "step": 19944 }, { "epoch": 2.0502621029910575, "grad_norm": 0.04350358247756958, "learning_rate": 0.01, "loss": 2.0439, "step": 19947 }, { "epoch": 2.0505704594511256, "grad_norm": 0.042655814439058304, "learning_rate": 0.01, "loss": 2.0821, "step": 19950 }, { "epoch": 2.0508788159111933, "grad_norm": 0.060070816427469254, "learning_rate": 0.01, "loss": 2.0495, "step": 19953 }, { "epoch": 2.0511871723712614, "grad_norm": 0.06479921191930771, "learning_rate": 0.01, "loss": 2.0783, "step": 19956 }, { "epoch": 2.051495528831329, "grad_norm": 0.0982329398393631, "learning_rate": 0.01, "loss": 2.0756, "step": 19959 }, { "epoch": 2.0518038852913967, "grad_norm": 0.10483184456825256, "learning_rate": 0.01, "loss": 2.035, "step": 19962 }, { "epoch": 2.052112241751465, "grad_norm": 0.06383049488067627, "learning_rate": 0.01, "loss": 2.0252, "step": 19965 }, { "epoch": 2.0524205982115324, "grad_norm": 0.13797828555107117, "learning_rate": 0.01, "loss": 2.0604, "step": 19968 }, { "epoch": 2.0527289546716005, "grad_norm": 0.037840090692043304, "learning_rate": 0.01, "loss": 2.0604, "step": 19971 }, { "epoch": 2.053037311131668, "grad_norm": 0.043872520327568054, "learning_rate": 0.01, "loss": 2.0609, "step": 19974 }, { "epoch": 2.053345667591736, "grad_norm": 0.03223152458667755, "learning_rate": 0.01, "loss": 2.0198, "step": 19977 }, { "epoch": 2.053654024051804, "grad_norm": 0.05935351178050041, "learning_rate": 0.01, "loss": 2.066, "step": 19980 }, { "epoch": 2.0539623805118716, "grad_norm": 0.054079607129096985, "learning_rate": 0.01, "loss": 2.0665, "step": 19983 }, { "epoch": 2.0542707369719397, "grad_norm": 0.04307890310883522, "learning_rate": 0.01, "loss": 2.0145, "step": 19986 }, { "epoch": 2.0545790934320074, "grad_norm": 0.06624720245599747, "learning_rate": 0.01, "loss": 2.052, "step": 19989 }, { "epoch": 2.054887449892075, "grad_norm": 0.08096028864383698, "learning_rate": 0.01, "loss": 2.0947, "step": 19992 }, { "epoch": 2.055195806352143, "grad_norm": 0.0872364342212677, "learning_rate": 0.01, "loss": 2.0675, "step": 19995 }, { "epoch": 2.055504162812211, "grad_norm": 0.04538879171013832, "learning_rate": 0.01, "loss": 2.031, "step": 19998 }, { "epoch": 2.055812519272279, "grad_norm": 0.11873256415128708, "learning_rate": 0.01, "loss": 2.0682, "step": 20001 }, { "epoch": 2.0561208757323466, "grad_norm": 0.05929452180862427, "learning_rate": 0.01, "loss": 2.0602, "step": 20004 }, { "epoch": 2.0564292321924142, "grad_norm": 0.05131294205784798, "learning_rate": 0.01, "loss": 2.0569, "step": 20007 }, { "epoch": 2.0567375886524824, "grad_norm": 0.05690256133675575, "learning_rate": 0.01, "loss": 2.0779, "step": 20010 }, { "epoch": 2.05704594511255, "grad_norm": 0.04414551705121994, "learning_rate": 0.01, "loss": 2.0168, "step": 20013 }, { "epoch": 2.057354301572618, "grad_norm": 0.04017036780714989, "learning_rate": 0.01, "loss": 2.0349, "step": 20016 }, { "epoch": 2.0576626580326858, "grad_norm": 0.06785457581281662, "learning_rate": 0.01, "loss": 2.058, "step": 20019 }, { "epoch": 2.0579710144927534, "grad_norm": 0.06258828938007355, "learning_rate": 0.01, "loss": 2.0484, "step": 20022 }, { "epoch": 2.0582793709528215, "grad_norm": 0.11196446418762207, "learning_rate": 0.01, "loss": 2.0624, "step": 20025 }, { "epoch": 2.058587727412889, "grad_norm": 0.08678428828716278, "learning_rate": 0.01, "loss": 2.0667, "step": 20028 }, { "epoch": 2.0588960838729573, "grad_norm": 0.13598018884658813, "learning_rate": 0.01, "loss": 2.0239, "step": 20031 }, { "epoch": 2.059204440333025, "grad_norm": 0.06666143238544464, "learning_rate": 0.01, "loss": 2.037, "step": 20034 }, { "epoch": 2.0595127967930926, "grad_norm": 0.05994727462530136, "learning_rate": 0.01, "loss": 2.0674, "step": 20037 }, { "epoch": 2.0598211532531607, "grad_norm": 0.03867008537054062, "learning_rate": 0.01, "loss": 2.056, "step": 20040 }, { "epoch": 2.0601295097132284, "grad_norm": 0.13077500462532043, "learning_rate": 0.01, "loss": 2.0345, "step": 20043 }, { "epoch": 2.0604378661732965, "grad_norm": 0.057164691388607025, "learning_rate": 0.01, "loss": 2.0436, "step": 20046 }, { "epoch": 2.060746222633364, "grad_norm": 0.07206998765468597, "learning_rate": 0.01, "loss": 2.0472, "step": 20049 }, { "epoch": 2.061054579093432, "grad_norm": 0.08844766765832901, "learning_rate": 0.01, "loss": 2.0535, "step": 20052 }, { "epoch": 2.0613629355535, "grad_norm": 0.07533573359251022, "learning_rate": 0.01, "loss": 2.0698, "step": 20055 }, { "epoch": 2.0616712920135676, "grad_norm": 0.03260966017842293, "learning_rate": 0.01, "loss": 2.0494, "step": 20058 }, { "epoch": 2.0619796484736357, "grad_norm": 0.03491971641778946, "learning_rate": 0.01, "loss": 2.0612, "step": 20061 }, { "epoch": 2.0622880049337033, "grad_norm": 0.04022398218512535, "learning_rate": 0.01, "loss": 2.0343, "step": 20064 }, { "epoch": 2.062596361393771, "grad_norm": 0.06325655430555344, "learning_rate": 0.01, "loss": 2.0409, "step": 20067 }, { "epoch": 2.062904717853839, "grad_norm": 0.06704667955636978, "learning_rate": 0.01, "loss": 2.0641, "step": 20070 }, { "epoch": 2.0632130743139068, "grad_norm": 0.06883389502763748, "learning_rate": 0.01, "loss": 2.0462, "step": 20073 }, { "epoch": 2.063521430773975, "grad_norm": 0.05242495611310005, "learning_rate": 0.01, "loss": 2.0701, "step": 20076 }, { "epoch": 2.0638297872340425, "grad_norm": 0.06587128341197968, "learning_rate": 0.01, "loss": 2.0443, "step": 20079 }, { "epoch": 2.06413814369411, "grad_norm": 0.03571178764104843, "learning_rate": 0.01, "loss": 2.0519, "step": 20082 }, { "epoch": 2.0644465001541783, "grad_norm": 0.061605412513017654, "learning_rate": 0.01, "loss": 2.0357, "step": 20085 }, { "epoch": 2.064754856614246, "grad_norm": 0.05552279204130173, "learning_rate": 0.01, "loss": 2.0258, "step": 20088 }, { "epoch": 2.065063213074314, "grad_norm": 0.047950152307748795, "learning_rate": 0.01, "loss": 2.0354, "step": 20091 }, { "epoch": 2.0653715695343817, "grad_norm": 0.09466604888439178, "learning_rate": 0.01, "loss": 2.0523, "step": 20094 }, { "epoch": 2.0656799259944494, "grad_norm": 0.04828859865665436, "learning_rate": 0.01, "loss": 2.0466, "step": 20097 }, { "epoch": 2.0659882824545175, "grad_norm": 0.03933820128440857, "learning_rate": 0.01, "loss": 2.0458, "step": 20100 }, { "epoch": 2.066296638914585, "grad_norm": 0.05044875666499138, "learning_rate": 0.01, "loss": 2.0396, "step": 20103 }, { "epoch": 2.0666049953746533, "grad_norm": 0.04152398556470871, "learning_rate": 0.01, "loss": 2.0269, "step": 20106 }, { "epoch": 2.066913351834721, "grad_norm": 0.10098916292190552, "learning_rate": 0.01, "loss": 2.0726, "step": 20109 }, { "epoch": 2.0672217082947886, "grad_norm": 0.06381060183048248, "learning_rate": 0.01, "loss": 2.0276, "step": 20112 }, { "epoch": 2.0675300647548567, "grad_norm": 0.13991308212280273, "learning_rate": 0.01, "loss": 2.0508, "step": 20115 }, { "epoch": 2.0678384212149243, "grad_norm": 0.061171598732471466, "learning_rate": 0.01, "loss": 2.0232, "step": 20118 }, { "epoch": 2.0681467776749924, "grad_norm": 0.04276692867279053, "learning_rate": 0.01, "loss": 2.0667, "step": 20121 }, { "epoch": 2.06845513413506, "grad_norm": 0.03582247719168663, "learning_rate": 0.01, "loss": 2.0511, "step": 20124 }, { "epoch": 2.0687634905951278, "grad_norm": 0.037077244371175766, "learning_rate": 0.01, "loss": 2.0275, "step": 20127 }, { "epoch": 2.069071847055196, "grad_norm": 0.11291185766458511, "learning_rate": 0.01, "loss": 2.0207, "step": 20130 }, { "epoch": 2.0693802035152635, "grad_norm": 0.06811921298503876, "learning_rate": 0.01, "loss": 2.0437, "step": 20133 }, { "epoch": 2.0696885599753316, "grad_norm": 0.049292147159576416, "learning_rate": 0.01, "loss": 2.0327, "step": 20136 }, { "epoch": 2.0699969164353993, "grad_norm": 0.08937390893697739, "learning_rate": 0.01, "loss": 2.0548, "step": 20139 }, { "epoch": 2.070305272895467, "grad_norm": 0.04353107511997223, "learning_rate": 0.01, "loss": 2.0522, "step": 20142 }, { "epoch": 2.070613629355535, "grad_norm": 0.03737090900540352, "learning_rate": 0.01, "loss": 2.0525, "step": 20145 }, { "epoch": 2.0709219858156027, "grad_norm": 0.038217127323150635, "learning_rate": 0.01, "loss": 2.0285, "step": 20148 }, { "epoch": 2.071230342275671, "grad_norm": 0.07162989675998688, "learning_rate": 0.01, "loss": 2.0556, "step": 20151 }, { "epoch": 2.0715386987357385, "grad_norm": 0.06507647782564163, "learning_rate": 0.01, "loss": 2.0348, "step": 20154 }, { "epoch": 2.071847055195806, "grad_norm": 0.07880635559558868, "learning_rate": 0.01, "loss": 2.0599, "step": 20157 }, { "epoch": 2.0721554116558742, "grad_norm": 0.11247913539409637, "learning_rate": 0.01, "loss": 2.042, "step": 20160 }, { "epoch": 2.072463768115942, "grad_norm": 0.04084709286689758, "learning_rate": 0.01, "loss": 2.0369, "step": 20163 }, { "epoch": 2.07277212457601, "grad_norm": 0.06349261850118637, "learning_rate": 0.01, "loss": 2.0656, "step": 20166 }, { "epoch": 2.0730804810360777, "grad_norm": 0.03916813060641289, "learning_rate": 0.01, "loss": 2.0142, "step": 20169 }, { "epoch": 2.0733888374961453, "grad_norm": 0.041867464780807495, "learning_rate": 0.01, "loss": 2.0613, "step": 20172 }, { "epoch": 2.0736971939562134, "grad_norm": 0.09670063108205795, "learning_rate": 0.01, "loss": 2.0254, "step": 20175 }, { "epoch": 2.074005550416281, "grad_norm": 0.05259916931390762, "learning_rate": 0.01, "loss": 2.059, "step": 20178 }, { "epoch": 2.074313906876349, "grad_norm": 0.0970730185508728, "learning_rate": 0.01, "loss": 2.0557, "step": 20181 }, { "epoch": 2.074622263336417, "grad_norm": 0.1317344307899475, "learning_rate": 0.01, "loss": 2.0225, "step": 20184 }, { "epoch": 2.0749306197964845, "grad_norm": 0.0787033885717392, "learning_rate": 0.01, "loss": 2.072, "step": 20187 }, { "epoch": 2.0752389762565526, "grad_norm": 0.04037567600607872, "learning_rate": 0.01, "loss": 2.0422, "step": 20190 }, { "epoch": 2.0755473327166203, "grad_norm": 0.03588324785232544, "learning_rate": 0.01, "loss": 2.0396, "step": 20193 }, { "epoch": 2.0758556891766884, "grad_norm": 0.05277855321764946, "learning_rate": 0.01, "loss": 2.0702, "step": 20196 }, { "epoch": 2.076164045636756, "grad_norm": 0.050833381712436676, "learning_rate": 0.01, "loss": 2.0748, "step": 20199 }, { "epoch": 2.076472402096824, "grad_norm": 0.089606374502182, "learning_rate": 0.01, "loss": 2.0482, "step": 20202 }, { "epoch": 2.076780758556892, "grad_norm": 0.05270789936184883, "learning_rate": 0.01, "loss": 2.0555, "step": 20205 }, { "epoch": 2.0770891150169595, "grad_norm": 0.06895376741886139, "learning_rate": 0.01, "loss": 2.0509, "step": 20208 }, { "epoch": 2.0773974714770276, "grad_norm": 0.11967889964580536, "learning_rate": 0.01, "loss": 2.0512, "step": 20211 }, { "epoch": 2.0777058279370952, "grad_norm": 0.11328759789466858, "learning_rate": 0.01, "loss": 2.002, "step": 20214 }, { "epoch": 2.078014184397163, "grad_norm": 0.045189183205366135, "learning_rate": 0.01, "loss": 2.0211, "step": 20217 }, { "epoch": 2.078322540857231, "grad_norm": 0.05716565251350403, "learning_rate": 0.01, "loss": 2.0413, "step": 20220 }, { "epoch": 2.0786308973172987, "grad_norm": 0.07340056449174881, "learning_rate": 0.01, "loss": 2.0457, "step": 20223 }, { "epoch": 2.0789392537773668, "grad_norm": 0.05395069718360901, "learning_rate": 0.01, "loss": 2.0582, "step": 20226 }, { "epoch": 2.0792476102374344, "grad_norm": 0.03723681718111038, "learning_rate": 0.01, "loss": 2.0581, "step": 20229 }, { "epoch": 2.0795559666975025, "grad_norm": 0.10626024752855301, "learning_rate": 0.01, "loss": 2.0351, "step": 20232 }, { "epoch": 2.07986432315757, "grad_norm": 0.09987606853246689, "learning_rate": 0.01, "loss": 2.067, "step": 20235 }, { "epoch": 2.080172679617638, "grad_norm": 0.07282060384750366, "learning_rate": 0.01, "loss": 2.0511, "step": 20238 }, { "epoch": 2.080481036077706, "grad_norm": 0.04192940518260002, "learning_rate": 0.01, "loss": 2.0519, "step": 20241 }, { "epoch": 2.0807893925377736, "grad_norm": 0.06585846096277237, "learning_rate": 0.01, "loss": 2.0282, "step": 20244 }, { "epoch": 2.0810977489978413, "grad_norm": 0.04427814856171608, "learning_rate": 0.01, "loss": 2.017, "step": 20247 }, { "epoch": 2.0814061054579094, "grad_norm": 0.05114896968007088, "learning_rate": 0.01, "loss": 2.059, "step": 20250 }, { "epoch": 2.081714461917977, "grad_norm": 0.0445995107293129, "learning_rate": 0.01, "loss": 2.0544, "step": 20253 }, { "epoch": 2.082022818378045, "grad_norm": 0.04904405400156975, "learning_rate": 0.01, "loss": 2.0428, "step": 20256 }, { "epoch": 2.082331174838113, "grad_norm": 0.03620357811450958, "learning_rate": 0.01, "loss": 2.0493, "step": 20259 }, { "epoch": 2.082639531298181, "grad_norm": 0.10994633287191391, "learning_rate": 0.01, "loss": 2.0719, "step": 20262 }, { "epoch": 2.0829478877582486, "grad_norm": 0.05244474112987518, "learning_rate": 0.01, "loss": 2.0559, "step": 20265 }, { "epoch": 2.0832562442183162, "grad_norm": 0.05937792733311653, "learning_rate": 0.01, "loss": 2.0593, "step": 20268 }, { "epoch": 2.0835646006783843, "grad_norm": 0.08669353276491165, "learning_rate": 0.01, "loss": 2.0539, "step": 20271 }, { "epoch": 2.083872957138452, "grad_norm": 0.054145876318216324, "learning_rate": 0.01, "loss": 2.0281, "step": 20274 }, { "epoch": 2.08418131359852, "grad_norm": 0.040682870894670486, "learning_rate": 0.01, "loss": 2.0657, "step": 20277 }, { "epoch": 2.0844896700585878, "grad_norm": 0.04110307991504669, "learning_rate": 0.01, "loss": 2.053, "step": 20280 }, { "epoch": 2.0847980265186554, "grad_norm": 0.13420680165290833, "learning_rate": 0.01, "loss": 2.0568, "step": 20283 }, { "epoch": 2.0851063829787235, "grad_norm": 0.049191731959581375, "learning_rate": 0.01, "loss": 2.0498, "step": 20286 }, { "epoch": 2.085414739438791, "grad_norm": 0.04682133346796036, "learning_rate": 0.01, "loss": 2.0532, "step": 20289 }, { "epoch": 2.0857230958988593, "grad_norm": 0.043646588921546936, "learning_rate": 0.01, "loss": 2.0582, "step": 20292 }, { "epoch": 2.086031452358927, "grad_norm": 0.05107354745268822, "learning_rate": 0.01, "loss": 2.0574, "step": 20295 }, { "epoch": 2.0863398088189946, "grad_norm": 0.06274458020925522, "learning_rate": 0.01, "loss": 2.0621, "step": 20298 }, { "epoch": 2.0866481652790627, "grad_norm": 0.11294244229793549, "learning_rate": 0.01, "loss": 2.0685, "step": 20301 }, { "epoch": 2.0869565217391304, "grad_norm": 0.04948057234287262, "learning_rate": 0.01, "loss": 2.0469, "step": 20304 }, { "epoch": 2.0872648781991985, "grad_norm": 0.04451402649283409, "learning_rate": 0.01, "loss": 2.0675, "step": 20307 }, { "epoch": 2.087573234659266, "grad_norm": 0.04940638318657875, "learning_rate": 0.01, "loss": 2.0584, "step": 20310 }, { "epoch": 2.087881591119334, "grad_norm": 0.06530692428350449, "learning_rate": 0.01, "loss": 2.0541, "step": 20313 }, { "epoch": 2.088189947579402, "grad_norm": 0.13395404815673828, "learning_rate": 0.01, "loss": 2.028, "step": 20316 }, { "epoch": 2.0884983040394696, "grad_norm": 0.09222474694252014, "learning_rate": 0.01, "loss": 2.045, "step": 20319 }, { "epoch": 2.0888066604995377, "grad_norm": 0.06510595977306366, "learning_rate": 0.01, "loss": 2.0291, "step": 20322 }, { "epoch": 2.0891150169596053, "grad_norm": 0.055552888661623, "learning_rate": 0.01, "loss": 2.0612, "step": 20325 }, { "epoch": 2.089423373419673, "grad_norm": 0.04411375895142555, "learning_rate": 0.01, "loss": 2.0368, "step": 20328 }, { "epoch": 2.089731729879741, "grad_norm": 0.05151544511318207, "learning_rate": 0.01, "loss": 2.0539, "step": 20331 }, { "epoch": 2.0900400863398088, "grad_norm": 0.09836700558662415, "learning_rate": 0.01, "loss": 2.0353, "step": 20334 }, { "epoch": 2.090348442799877, "grad_norm": 0.06430090218782425, "learning_rate": 0.01, "loss": 2.043, "step": 20337 }, { "epoch": 2.0906567992599445, "grad_norm": 0.09683403372764587, "learning_rate": 0.01, "loss": 2.0459, "step": 20340 }, { "epoch": 2.090965155720012, "grad_norm": 0.08345566689968109, "learning_rate": 0.01, "loss": 2.0543, "step": 20343 }, { "epoch": 2.0912735121800803, "grad_norm": 0.045199088752269745, "learning_rate": 0.01, "loss": 2.0509, "step": 20346 }, { "epoch": 2.091581868640148, "grad_norm": 0.0399625338613987, "learning_rate": 0.01, "loss": 2.0256, "step": 20349 }, { "epoch": 2.091890225100216, "grad_norm": 0.03815968707203865, "learning_rate": 0.01, "loss": 2.0196, "step": 20352 }, { "epoch": 2.0921985815602837, "grad_norm": 0.054826896637678146, "learning_rate": 0.01, "loss": 2.035, "step": 20355 }, { "epoch": 2.0925069380203514, "grad_norm": 0.05717878043651581, "learning_rate": 0.01, "loss": 2.0613, "step": 20358 }, { "epoch": 2.0928152944804195, "grad_norm": 0.09222474694252014, "learning_rate": 0.01, "loss": 2.037, "step": 20361 }, { "epoch": 2.093123650940487, "grad_norm": 0.0983637347817421, "learning_rate": 0.01, "loss": 2.0652, "step": 20364 }, { "epoch": 2.0934320074005552, "grad_norm": 0.05338272079825401, "learning_rate": 0.01, "loss": 2.0448, "step": 20367 }, { "epoch": 2.093740363860623, "grad_norm": 0.047821927815675735, "learning_rate": 0.01, "loss": 2.0299, "step": 20370 }, { "epoch": 2.0940487203206906, "grad_norm": 0.10657365620136261, "learning_rate": 0.01, "loss": 2.0533, "step": 20373 }, { "epoch": 2.0943570767807587, "grad_norm": 0.08650174736976624, "learning_rate": 0.01, "loss": 2.0486, "step": 20376 }, { "epoch": 2.0946654332408263, "grad_norm": 0.06975332647562027, "learning_rate": 0.01, "loss": 2.0365, "step": 20379 }, { "epoch": 2.0949737897008944, "grad_norm": 0.08443387597799301, "learning_rate": 0.01, "loss": 2.0529, "step": 20382 }, { "epoch": 2.095282146160962, "grad_norm": 0.04376668483018875, "learning_rate": 0.01, "loss": 2.0238, "step": 20385 }, { "epoch": 2.0955905026210297, "grad_norm": 0.1501801759004593, "learning_rate": 0.01, "loss": 2.0461, "step": 20388 }, { "epoch": 2.095898859081098, "grad_norm": 0.08488426357507706, "learning_rate": 0.01, "loss": 2.0391, "step": 20391 }, { "epoch": 2.0962072155411655, "grad_norm": 0.05569930747151375, "learning_rate": 0.01, "loss": 2.0375, "step": 20394 }, { "epoch": 2.0965155720012336, "grad_norm": 0.059826262295246124, "learning_rate": 0.01, "loss": 2.0303, "step": 20397 }, { "epoch": 2.0968239284613013, "grad_norm": 0.0911981388926506, "learning_rate": 0.01, "loss": 2.0568, "step": 20400 }, { "epoch": 2.097132284921369, "grad_norm": 0.03921716660261154, "learning_rate": 0.01, "loss": 2.0296, "step": 20403 }, { "epoch": 2.097440641381437, "grad_norm": 0.07355164736509323, "learning_rate": 0.01, "loss": 2.0397, "step": 20406 }, { "epoch": 2.0977489978415047, "grad_norm": 0.031198322772979736, "learning_rate": 0.01, "loss": 2.0755, "step": 20409 }, { "epoch": 2.098057354301573, "grad_norm": 0.07405471056699753, "learning_rate": 0.01, "loss": 2.0174, "step": 20412 }, { "epoch": 2.0983657107616405, "grad_norm": 0.08160628378391266, "learning_rate": 0.01, "loss": 2.0509, "step": 20415 }, { "epoch": 2.098674067221708, "grad_norm": 0.11125149577856064, "learning_rate": 0.01, "loss": 2.0612, "step": 20418 }, { "epoch": 2.0989824236817762, "grad_norm": 0.04484894871711731, "learning_rate": 0.01, "loss": 2.061, "step": 20421 }, { "epoch": 2.099290780141844, "grad_norm": 0.07138056308031082, "learning_rate": 0.01, "loss": 2.0469, "step": 20424 }, { "epoch": 2.099599136601912, "grad_norm": 0.0794389471411705, "learning_rate": 0.01, "loss": 2.0423, "step": 20427 }, { "epoch": 2.0999074930619797, "grad_norm": 0.05673963576555252, "learning_rate": 0.01, "loss": 2.0708, "step": 20430 }, { "epoch": 2.1002158495220473, "grad_norm": 0.054527074098587036, "learning_rate": 0.01, "loss": 2.0304, "step": 20433 }, { "epoch": 2.1005242059821154, "grad_norm": 0.04964460805058479, "learning_rate": 0.01, "loss": 2.0302, "step": 20436 }, { "epoch": 2.100832562442183, "grad_norm": 0.07147829979658127, "learning_rate": 0.01, "loss": 2.0365, "step": 20439 }, { "epoch": 2.101140918902251, "grad_norm": 0.07474019378423691, "learning_rate": 0.01, "loss": 2.0559, "step": 20442 }, { "epoch": 2.101449275362319, "grad_norm": 0.06944689154624939, "learning_rate": 0.01, "loss": 2.0477, "step": 20445 }, { "epoch": 2.1017576318223865, "grad_norm": 0.07336383312940598, "learning_rate": 0.01, "loss": 2.0386, "step": 20448 }, { "epoch": 2.1020659882824546, "grad_norm": 0.08889491856098175, "learning_rate": 0.01, "loss": 2.0822, "step": 20451 }, { "epoch": 2.1023743447425223, "grad_norm": 0.07154878228902817, "learning_rate": 0.01, "loss": 2.0672, "step": 20454 }, { "epoch": 2.1026827012025904, "grad_norm": 0.0471792072057724, "learning_rate": 0.01, "loss": 2.026, "step": 20457 }, { "epoch": 2.102991057662658, "grad_norm": 0.10836900025606155, "learning_rate": 0.01, "loss": 2.037, "step": 20460 }, { "epoch": 2.1032994141227257, "grad_norm": 0.05292079225182533, "learning_rate": 0.01, "loss": 2.0145, "step": 20463 }, { "epoch": 2.103607770582794, "grad_norm": 0.13195812702178955, "learning_rate": 0.01, "loss": 2.0426, "step": 20466 }, { "epoch": 2.1039161270428615, "grad_norm": 0.1316298395395279, "learning_rate": 0.01, "loss": 2.0179, "step": 20469 }, { "epoch": 2.1042244835029296, "grad_norm": 0.04061713069677353, "learning_rate": 0.01, "loss": 2.0304, "step": 20472 }, { "epoch": 2.1045328399629972, "grad_norm": 0.09250857681035995, "learning_rate": 0.01, "loss": 2.0596, "step": 20475 }, { "epoch": 2.104841196423065, "grad_norm": 0.058364611119031906, "learning_rate": 0.01, "loss": 2.0442, "step": 20478 }, { "epoch": 2.105149552883133, "grad_norm": 0.046974651515483856, "learning_rate": 0.01, "loss": 2.0439, "step": 20481 }, { "epoch": 2.1054579093432007, "grad_norm": 0.04835136979818344, "learning_rate": 0.01, "loss": 2.044, "step": 20484 }, { "epoch": 2.1057662658032688, "grad_norm": 0.04643654450774193, "learning_rate": 0.01, "loss": 2.0276, "step": 20487 }, { "epoch": 2.1060746222633364, "grad_norm": 0.10667752474546432, "learning_rate": 0.01, "loss": 2.0356, "step": 20490 }, { "epoch": 2.106382978723404, "grad_norm": 0.07521391659975052, "learning_rate": 0.01, "loss": 2.0552, "step": 20493 }, { "epoch": 2.106691335183472, "grad_norm": 0.10269229114055634, "learning_rate": 0.01, "loss": 2.0452, "step": 20496 }, { "epoch": 2.10699969164354, "grad_norm": 0.040783487260341644, "learning_rate": 0.01, "loss": 2.0567, "step": 20499 }, { "epoch": 2.107308048103608, "grad_norm": 0.05012373626232147, "learning_rate": 0.01, "loss": 2.0359, "step": 20502 }, { "epoch": 2.1076164045636756, "grad_norm": 0.042675044387578964, "learning_rate": 0.01, "loss": 2.0367, "step": 20505 }, { "epoch": 2.1079247610237433, "grad_norm": 0.06164225935935974, "learning_rate": 0.01, "loss": 2.0324, "step": 20508 }, { "epoch": 2.1082331174838114, "grad_norm": 0.06368019431829453, "learning_rate": 0.01, "loss": 2.0228, "step": 20511 }, { "epoch": 2.108541473943879, "grad_norm": 0.049319278448820114, "learning_rate": 0.01, "loss": 2.0443, "step": 20514 }, { "epoch": 2.108849830403947, "grad_norm": 0.06070362776517868, "learning_rate": 0.01, "loss": 2.045, "step": 20517 }, { "epoch": 2.109158186864015, "grad_norm": 0.07560203224420547, "learning_rate": 0.01, "loss": 2.0469, "step": 20520 }, { "epoch": 2.1094665433240825, "grad_norm": 0.05563758686184883, "learning_rate": 0.01, "loss": 2.0415, "step": 20523 }, { "epoch": 2.1097748997841506, "grad_norm": 0.04349389672279358, "learning_rate": 0.01, "loss": 2.0396, "step": 20526 }, { "epoch": 2.110083256244218, "grad_norm": 0.05849798396229744, "learning_rate": 0.01, "loss": 2.0235, "step": 20529 }, { "epoch": 2.1103916127042863, "grad_norm": 0.06813669949769974, "learning_rate": 0.01, "loss": 2.0541, "step": 20532 }, { "epoch": 2.110699969164354, "grad_norm": 0.08951954543590546, "learning_rate": 0.01, "loss": 2.0232, "step": 20535 }, { "epoch": 2.1110083256244216, "grad_norm": 0.08673957735300064, "learning_rate": 0.01, "loss": 2.0602, "step": 20538 }, { "epoch": 2.1113166820844897, "grad_norm": 0.06135937571525574, "learning_rate": 0.01, "loss": 2.0416, "step": 20541 }, { "epoch": 2.1116250385445574, "grad_norm": 0.08979734778404236, "learning_rate": 0.01, "loss": 2.0504, "step": 20544 }, { "epoch": 2.1119333950046255, "grad_norm": 0.07878229022026062, "learning_rate": 0.01, "loss": 2.0194, "step": 20547 }, { "epoch": 2.112241751464693, "grad_norm": 0.07120572775602341, "learning_rate": 0.01, "loss": 2.0414, "step": 20550 }, { "epoch": 2.112550107924761, "grad_norm": 0.11559943854808807, "learning_rate": 0.01, "loss": 2.0334, "step": 20553 }, { "epoch": 2.112858464384829, "grad_norm": 0.09005344659090042, "learning_rate": 0.01, "loss": 2.0457, "step": 20556 }, { "epoch": 2.1131668208448966, "grad_norm": 0.05143802613019943, "learning_rate": 0.01, "loss": 2.0364, "step": 20559 }, { "epoch": 2.1134751773049647, "grad_norm": 0.03870050981640816, "learning_rate": 0.01, "loss": 2.0216, "step": 20562 }, { "epoch": 2.1137835337650324, "grad_norm": 0.06471268832683563, "learning_rate": 0.01, "loss": 2.0116, "step": 20565 }, { "epoch": 2.1140918902251, "grad_norm": 0.07047493010759354, "learning_rate": 0.01, "loss": 2.052, "step": 20568 }, { "epoch": 2.114400246685168, "grad_norm": 0.08189850300550461, "learning_rate": 0.01, "loss": 2.012, "step": 20571 }, { "epoch": 2.114708603145236, "grad_norm": 0.04041110724210739, "learning_rate": 0.01, "loss": 2.0604, "step": 20574 }, { "epoch": 2.115016959605304, "grad_norm": 0.06573761254549026, "learning_rate": 0.01, "loss": 2.0466, "step": 20577 }, { "epoch": 2.1153253160653716, "grad_norm": 0.06190131977200508, "learning_rate": 0.01, "loss": 2.0475, "step": 20580 }, { "epoch": 2.115633672525439, "grad_norm": 0.044485028833150864, "learning_rate": 0.01, "loss": 2.0336, "step": 20583 }, { "epoch": 2.1159420289855073, "grad_norm": 0.03833581507205963, "learning_rate": 0.01, "loss": 2.0446, "step": 20586 }, { "epoch": 2.116250385445575, "grad_norm": 0.07705090194940567, "learning_rate": 0.01, "loss": 2.0546, "step": 20589 }, { "epoch": 2.116558741905643, "grad_norm": 0.12635158002376556, "learning_rate": 0.01, "loss": 2.0399, "step": 20592 }, { "epoch": 2.1168670983657107, "grad_norm": 0.06076742708683014, "learning_rate": 0.01, "loss": 2.0409, "step": 20595 }, { "epoch": 2.1171754548257784, "grad_norm": 0.06634259968996048, "learning_rate": 0.01, "loss": 2.0603, "step": 20598 }, { "epoch": 2.1174838112858465, "grad_norm": 0.06167810782790184, "learning_rate": 0.01, "loss": 2.0404, "step": 20601 }, { "epoch": 2.117792167745914, "grad_norm": 0.0474831759929657, "learning_rate": 0.01, "loss": 2.0413, "step": 20604 }, { "epoch": 2.1181005242059823, "grad_norm": 0.04799888655543327, "learning_rate": 0.01, "loss": 2.0628, "step": 20607 }, { "epoch": 2.11840888066605, "grad_norm": 0.055144913494586945, "learning_rate": 0.01, "loss": 2.0126, "step": 20610 }, { "epoch": 2.1187172371261176, "grad_norm": 0.1116517037153244, "learning_rate": 0.01, "loss": 2.0467, "step": 20613 }, { "epoch": 2.1190255935861857, "grad_norm": 0.06136411800980568, "learning_rate": 0.01, "loss": 2.0489, "step": 20616 }, { "epoch": 2.1193339500462534, "grad_norm": 0.09005284309387207, "learning_rate": 0.01, "loss": 2.0185, "step": 20619 }, { "epoch": 2.1196423065063215, "grad_norm": 0.19345355033874512, "learning_rate": 0.01, "loss": 2.0588, "step": 20622 }, { "epoch": 2.119950662966389, "grad_norm": 0.14011520147323608, "learning_rate": 0.01, "loss": 2.0264, "step": 20625 }, { "epoch": 2.120259019426457, "grad_norm": 0.06036897376179695, "learning_rate": 0.01, "loss": 2.0566, "step": 20628 }, { "epoch": 2.120567375886525, "grad_norm": 0.05589490756392479, "learning_rate": 0.01, "loss": 2.0455, "step": 20631 }, { "epoch": 2.1208757323465925, "grad_norm": 0.07571965456008911, "learning_rate": 0.01, "loss": 2.0403, "step": 20634 }, { "epoch": 2.1211840888066607, "grad_norm": 0.05558032542467117, "learning_rate": 0.01, "loss": 2.0373, "step": 20637 }, { "epoch": 2.1214924452667283, "grad_norm": 0.048844993114471436, "learning_rate": 0.01, "loss": 2.0209, "step": 20640 }, { "epoch": 2.121800801726796, "grad_norm": 0.04454483836889267, "learning_rate": 0.01, "loss": 2.0431, "step": 20643 }, { "epoch": 2.122109158186864, "grad_norm": 0.04565678536891937, "learning_rate": 0.01, "loss": 2.0498, "step": 20646 }, { "epoch": 2.1224175146469317, "grad_norm": 0.04082358628511429, "learning_rate": 0.01, "loss": 2.0201, "step": 20649 }, { "epoch": 2.122725871107, "grad_norm": 0.03753536194562912, "learning_rate": 0.01, "loss": 2.0324, "step": 20652 }, { "epoch": 2.1230342275670675, "grad_norm": 0.04898180440068245, "learning_rate": 0.01, "loss": 2.0503, "step": 20655 }, { "epoch": 2.123342584027135, "grad_norm": 0.0748276561498642, "learning_rate": 0.01, "loss": 2.0632, "step": 20658 }, { "epoch": 2.1236509404872033, "grad_norm": 0.19312037527561188, "learning_rate": 0.01, "loss": 2.0409, "step": 20661 }, { "epoch": 2.123959296947271, "grad_norm": 0.1444995105266571, "learning_rate": 0.01, "loss": 2.0713, "step": 20664 }, { "epoch": 2.124267653407339, "grad_norm": 0.03868752345442772, "learning_rate": 0.01, "loss": 2.045, "step": 20667 }, { "epoch": 2.1245760098674067, "grad_norm": 0.04851067438721657, "learning_rate": 0.01, "loss": 2.0347, "step": 20670 }, { "epoch": 2.1248843663274743, "grad_norm": 0.03709336742758751, "learning_rate": 0.01, "loss": 2.0288, "step": 20673 }, { "epoch": 2.1251927227875425, "grad_norm": 0.03498173505067825, "learning_rate": 0.01, "loss": 2.0262, "step": 20676 }, { "epoch": 2.12550107924761, "grad_norm": 0.04838285222649574, "learning_rate": 0.01, "loss": 2.0391, "step": 20679 }, { "epoch": 2.125809435707678, "grad_norm": 0.04562096297740936, "learning_rate": 0.01, "loss": 2.0226, "step": 20682 }, { "epoch": 2.126117792167746, "grad_norm": 0.06128044053912163, "learning_rate": 0.01, "loss": 2.0452, "step": 20685 }, { "epoch": 2.1264261486278135, "grad_norm": 0.04903801530599594, "learning_rate": 0.01, "loss": 2.049, "step": 20688 }, { "epoch": 2.1267345050878816, "grad_norm": 0.038953814655542374, "learning_rate": 0.01, "loss": 2.0559, "step": 20691 }, { "epoch": 2.1270428615479493, "grad_norm": 0.06137779355049133, "learning_rate": 0.01, "loss": 2.0458, "step": 20694 }, { "epoch": 2.1273512180080174, "grad_norm": 0.11357403546571732, "learning_rate": 0.01, "loss": 2.0572, "step": 20697 }, { "epoch": 2.127659574468085, "grad_norm": 0.16223950684070587, "learning_rate": 0.01, "loss": 2.0158, "step": 20700 }, { "epoch": 2.127967930928153, "grad_norm": 0.06391070038080215, "learning_rate": 0.01, "loss": 2.0682, "step": 20703 }, { "epoch": 2.128276287388221, "grad_norm": 0.03951489180326462, "learning_rate": 0.01, "loss": 2.0645, "step": 20706 }, { "epoch": 2.1285846438482885, "grad_norm": 0.03921591490507126, "learning_rate": 0.01, "loss": 2.0538, "step": 20709 }, { "epoch": 2.1288930003083566, "grad_norm": 0.0479004830121994, "learning_rate": 0.01, "loss": 2.0587, "step": 20712 }, { "epoch": 2.1292013567684243, "grad_norm": 0.04370121285319328, "learning_rate": 0.01, "loss": 2.0249, "step": 20715 }, { "epoch": 2.129509713228492, "grad_norm": 0.05750906467437744, "learning_rate": 0.01, "loss": 2.0452, "step": 20718 }, { "epoch": 2.12981806968856, "grad_norm": 0.06549614667892456, "learning_rate": 0.01, "loss": 2.0173, "step": 20721 }, { "epoch": 2.1301264261486277, "grad_norm": 0.05763638764619827, "learning_rate": 0.01, "loss": 2.0315, "step": 20724 }, { "epoch": 2.130434782608696, "grad_norm": 0.16448546946048737, "learning_rate": 0.01, "loss": 2.0352, "step": 20727 }, { "epoch": 2.1307431390687634, "grad_norm": 0.057152118533849716, "learning_rate": 0.01, "loss": 2.0361, "step": 20730 }, { "epoch": 2.1310514955288316, "grad_norm": 0.054196059703826904, "learning_rate": 0.01, "loss": 2.0387, "step": 20733 }, { "epoch": 2.131359851988899, "grad_norm": 0.03887069597840309, "learning_rate": 0.01, "loss": 2.031, "step": 20736 }, { "epoch": 2.131668208448967, "grad_norm": 0.03529683127999306, "learning_rate": 0.01, "loss": 2.0466, "step": 20739 }, { "epoch": 2.131976564909035, "grad_norm": 0.053463347256183624, "learning_rate": 0.01, "loss": 2.011, "step": 20742 }, { "epoch": 2.1322849213691026, "grad_norm": 0.03482777252793312, "learning_rate": 0.01, "loss": 2.0421, "step": 20745 }, { "epoch": 2.1325932778291703, "grad_norm": 0.056043028831481934, "learning_rate": 0.01, "loss": 2.0442, "step": 20748 }, { "epoch": 2.1329016342892384, "grad_norm": 0.04648544266819954, "learning_rate": 0.01, "loss": 2.0502, "step": 20751 }, { "epoch": 2.133209990749306, "grad_norm": 0.0878090187907219, "learning_rate": 0.01, "loss": 2.0092, "step": 20754 }, { "epoch": 2.133518347209374, "grad_norm": 0.059173766523599625, "learning_rate": 0.01, "loss": 2.0469, "step": 20757 }, { "epoch": 2.133826703669442, "grad_norm": 0.09119824320077896, "learning_rate": 0.01, "loss": 2.0448, "step": 20760 }, { "epoch": 2.13413506012951, "grad_norm": 0.04082552343606949, "learning_rate": 0.01, "loss": 2.0501, "step": 20763 }, { "epoch": 2.1344434165895776, "grad_norm": 0.0831313356757164, "learning_rate": 0.01, "loss": 2.0293, "step": 20766 }, { "epoch": 2.1347517730496453, "grad_norm": 0.0635252296924591, "learning_rate": 0.01, "loss": 2.0495, "step": 20769 }, { "epoch": 2.1350601295097134, "grad_norm": 0.09516814351081848, "learning_rate": 0.01, "loss": 2.0284, "step": 20772 }, { "epoch": 2.135368485969781, "grad_norm": 0.05734236165881157, "learning_rate": 0.01, "loss": 2.0341, "step": 20775 }, { "epoch": 2.1356768424298487, "grad_norm": 0.07606332749128342, "learning_rate": 0.01, "loss": 2.0388, "step": 20778 }, { "epoch": 2.135985198889917, "grad_norm": 0.041968777775764465, "learning_rate": 0.01, "loss": 2.0172, "step": 20781 }, { "epoch": 2.1362935553499844, "grad_norm": 0.10549111664295197, "learning_rate": 0.01, "loss": 2.0695, "step": 20784 }, { "epoch": 2.1366019118100525, "grad_norm": 0.0783652663230896, "learning_rate": 0.01, "loss": 2.0416, "step": 20787 }, { "epoch": 2.13691026827012, "grad_norm": 0.08309295773506165, "learning_rate": 0.01, "loss": 2.0455, "step": 20790 }, { "epoch": 2.1372186247301883, "grad_norm": 0.038408368825912476, "learning_rate": 0.01, "loss": 2.0298, "step": 20793 }, { "epoch": 2.137526981190256, "grad_norm": 0.1311808079481125, "learning_rate": 0.01, "loss": 2.0608, "step": 20796 }, { "epoch": 2.1378353376503236, "grad_norm": 0.08983028680086136, "learning_rate": 0.01, "loss": 2.0357, "step": 20799 }, { "epoch": 2.1381436941103917, "grad_norm": 0.12422356754541397, "learning_rate": 0.01, "loss": 2.0443, "step": 20802 }, { "epoch": 2.1384520505704594, "grad_norm": 0.0687076672911644, "learning_rate": 0.01, "loss": 2.0224, "step": 20805 }, { "epoch": 2.138760407030527, "grad_norm": 0.04577179625630379, "learning_rate": 0.01, "loss": 2.0195, "step": 20808 }, { "epoch": 2.139068763490595, "grad_norm": 0.046294886618852615, "learning_rate": 0.01, "loss": 2.028, "step": 20811 }, { "epoch": 2.139377119950663, "grad_norm": 0.048477739095687866, "learning_rate": 0.01, "loss": 2.055, "step": 20814 }, { "epoch": 2.139685476410731, "grad_norm": 0.035206399857997894, "learning_rate": 0.01, "loss": 2.0242, "step": 20817 }, { "epoch": 2.1399938328707986, "grad_norm": 0.0451078936457634, "learning_rate": 0.01, "loss": 2.0316, "step": 20820 }, { "epoch": 2.1403021893308667, "grad_norm": 0.09469619393348694, "learning_rate": 0.01, "loss": 2.0259, "step": 20823 }, { "epoch": 2.1406105457909343, "grad_norm": 0.03966006636619568, "learning_rate": 0.01, "loss": 2.0427, "step": 20826 }, { "epoch": 2.140918902251002, "grad_norm": 0.0793539360165596, "learning_rate": 0.01, "loss": 2.0469, "step": 20829 }, { "epoch": 2.14122725871107, "grad_norm": 0.08547502756118774, "learning_rate": 0.01, "loss": 2.0189, "step": 20832 }, { "epoch": 2.1415356151711378, "grad_norm": 0.055916544049978256, "learning_rate": 0.01, "loss": 2.042, "step": 20835 }, { "epoch": 2.141843971631206, "grad_norm": 0.0369451642036438, "learning_rate": 0.01, "loss": 2.0346, "step": 20838 }, { "epoch": 2.1421523280912735, "grad_norm": 0.05001531541347504, "learning_rate": 0.01, "loss": 2.06, "step": 20841 }, { "epoch": 2.142460684551341, "grad_norm": 0.09491688758134842, "learning_rate": 0.01, "loss": 2.0092, "step": 20844 }, { "epoch": 2.1427690410114093, "grad_norm": 0.05615471303462982, "learning_rate": 0.01, "loss": 2.0575, "step": 20847 }, { "epoch": 2.143077397471477, "grad_norm": 0.08531193435192108, "learning_rate": 0.01, "loss": 2.0562, "step": 20850 }, { "epoch": 2.143385753931545, "grad_norm": 0.08138839155435562, "learning_rate": 0.01, "loss": 2.0482, "step": 20853 }, { "epoch": 2.1436941103916127, "grad_norm": 0.04965364560484886, "learning_rate": 0.01, "loss": 2.0487, "step": 20856 }, { "epoch": 2.1440024668516804, "grad_norm": 0.07006839662790298, "learning_rate": 0.01, "loss": 2.0469, "step": 20859 }, { "epoch": 2.1443108233117485, "grad_norm": 0.07477371394634247, "learning_rate": 0.01, "loss": 2.0452, "step": 20862 }, { "epoch": 2.144619179771816, "grad_norm": 0.07817060500383377, "learning_rate": 0.01, "loss": 2.0239, "step": 20865 }, { "epoch": 2.1449275362318843, "grad_norm": 0.04393570497632027, "learning_rate": 0.01, "loss": 2.0261, "step": 20868 }, { "epoch": 2.145235892691952, "grad_norm": 0.0777692198753357, "learning_rate": 0.01, "loss": 2.0313, "step": 20871 }, { "epoch": 2.1455442491520196, "grad_norm": 0.05267953500151634, "learning_rate": 0.01, "loss": 2.0496, "step": 20874 }, { "epoch": 2.1458526056120877, "grad_norm": 0.053839076310396194, "learning_rate": 0.01, "loss": 2.0593, "step": 20877 }, { "epoch": 2.1461609620721553, "grad_norm": 0.05793678015470505, "learning_rate": 0.01, "loss": 2.0333, "step": 20880 }, { "epoch": 2.1464693185322234, "grad_norm": 0.045539624989032745, "learning_rate": 0.01, "loss": 2.0504, "step": 20883 }, { "epoch": 2.146777674992291, "grad_norm": 0.062209442257881165, "learning_rate": 0.01, "loss": 2.0273, "step": 20886 }, { "epoch": 2.1470860314523588, "grad_norm": 0.11053802073001862, "learning_rate": 0.01, "loss": 2.0764, "step": 20889 }, { "epoch": 2.147394387912427, "grad_norm": 0.037597738206386566, "learning_rate": 0.01, "loss": 2.046, "step": 20892 }, { "epoch": 2.1477027443724945, "grad_norm": 0.050160784274339676, "learning_rate": 0.01, "loss": 2.0797, "step": 20895 }, { "epoch": 2.1480111008325626, "grad_norm": 0.053438689559698105, "learning_rate": 0.01, "loss": 2.0611, "step": 20898 }, { "epoch": 2.1483194572926303, "grad_norm": 0.06175898388028145, "learning_rate": 0.01, "loss": 2.0558, "step": 20901 }, { "epoch": 2.148627813752698, "grad_norm": 0.062430836260318756, "learning_rate": 0.01, "loss": 2.0405, "step": 20904 }, { "epoch": 2.148936170212766, "grad_norm": 0.09347888082265854, "learning_rate": 0.01, "loss": 2.0482, "step": 20907 }, { "epoch": 2.1492445266728337, "grad_norm": 0.13494189083576202, "learning_rate": 0.01, "loss": 2.0483, "step": 20910 }, { "epoch": 2.149552883132902, "grad_norm": 0.06735777854919434, "learning_rate": 0.01, "loss": 2.0799, "step": 20913 }, { "epoch": 2.1498612395929695, "grad_norm": 0.08108525723218918, "learning_rate": 0.01, "loss": 2.036, "step": 20916 }, { "epoch": 2.150169596053037, "grad_norm": 0.043221328407526016, "learning_rate": 0.01, "loss": 2.0401, "step": 20919 }, { "epoch": 2.1504779525131053, "grad_norm": 0.046568017452955246, "learning_rate": 0.01, "loss": 2.0393, "step": 20922 }, { "epoch": 2.150786308973173, "grad_norm": 0.040786582976579666, "learning_rate": 0.01, "loss": 2.0527, "step": 20925 }, { "epoch": 2.151094665433241, "grad_norm": 0.043856412172317505, "learning_rate": 0.01, "loss": 2.0405, "step": 20928 }, { "epoch": 2.1514030218933087, "grad_norm": 0.03569160774350166, "learning_rate": 0.01, "loss": 2.0431, "step": 20931 }, { "epoch": 2.1517113783533763, "grad_norm": 0.059598151594400406, "learning_rate": 0.01, "loss": 2.0316, "step": 20934 }, { "epoch": 2.1520197348134444, "grad_norm": 0.12518006563186646, "learning_rate": 0.01, "loss": 2.0389, "step": 20937 }, { "epoch": 2.152328091273512, "grad_norm": 0.04442603886127472, "learning_rate": 0.01, "loss": 2.0413, "step": 20940 }, { "epoch": 2.15263644773358, "grad_norm": 0.1175926923751831, "learning_rate": 0.01, "loss": 2.0307, "step": 20943 }, { "epoch": 2.152944804193648, "grad_norm": 0.09045865386724472, "learning_rate": 0.01, "loss": 2.0291, "step": 20946 }, { "epoch": 2.1532531606537155, "grad_norm": 0.07519534230232239, "learning_rate": 0.01, "loss": 2.048, "step": 20949 }, { "epoch": 2.1535615171137836, "grad_norm": 0.03551176190376282, "learning_rate": 0.01, "loss": 2.0396, "step": 20952 }, { "epoch": 2.1538698735738513, "grad_norm": 0.11210478097200394, "learning_rate": 0.01, "loss": 2.0453, "step": 20955 }, { "epoch": 2.1541782300339194, "grad_norm": 0.09921340644359589, "learning_rate": 0.01, "loss": 2.0295, "step": 20958 }, { "epoch": 2.154486586493987, "grad_norm": 0.04131443426012993, "learning_rate": 0.01, "loss": 2.0295, "step": 20961 }, { "epoch": 2.1547949429540547, "grad_norm": 0.07579990476369858, "learning_rate": 0.01, "loss": 2.041, "step": 20964 }, { "epoch": 2.155103299414123, "grad_norm": 0.04955494403839111, "learning_rate": 0.01, "loss": 2.0409, "step": 20967 }, { "epoch": 2.1554116558741905, "grad_norm": 0.038999974727630615, "learning_rate": 0.01, "loss": 2.0267, "step": 20970 }, { "epoch": 2.1557200123342586, "grad_norm": 0.04656311497092247, "learning_rate": 0.01, "loss": 2.0319, "step": 20973 }, { "epoch": 2.1560283687943262, "grad_norm": 0.08853477984666824, "learning_rate": 0.01, "loss": 2.0573, "step": 20976 }, { "epoch": 2.156336725254394, "grad_norm": 0.07657559216022491, "learning_rate": 0.01, "loss": 2.0455, "step": 20979 }, { "epoch": 2.156645081714462, "grad_norm": 0.03593476489186287, "learning_rate": 0.01, "loss": 2.0365, "step": 20982 }, { "epoch": 2.1569534381745297, "grad_norm": 0.08656775951385498, "learning_rate": 0.01, "loss": 2.0286, "step": 20985 }, { "epoch": 2.1572617946345978, "grad_norm": 0.07760220021009445, "learning_rate": 0.01, "loss": 2.068, "step": 20988 }, { "epoch": 2.1575701510946654, "grad_norm": 0.09167666733264923, "learning_rate": 0.01, "loss": 2.0165, "step": 20991 }, { "epoch": 2.157878507554733, "grad_norm": 0.052361227571964264, "learning_rate": 0.01, "loss": 2.0424, "step": 20994 }, { "epoch": 2.158186864014801, "grad_norm": 0.05431961268186569, "learning_rate": 0.01, "loss": 2.0475, "step": 20997 }, { "epoch": 2.158495220474869, "grad_norm": 0.03956061974167824, "learning_rate": 0.01, "loss": 2.0299, "step": 21000 }, { "epoch": 2.158803576934937, "grad_norm": 0.03906402364373207, "learning_rate": 0.01, "loss": 2.0442, "step": 21003 }, { "epoch": 2.1591119333950046, "grad_norm": 0.07876679301261902, "learning_rate": 0.01, "loss": 2.0369, "step": 21006 }, { "epoch": 2.1594202898550723, "grad_norm": 0.051768235862255096, "learning_rate": 0.01, "loss": 2.0404, "step": 21009 }, { "epoch": 2.1597286463151404, "grad_norm": 0.05242472141981125, "learning_rate": 0.01, "loss": 2.0514, "step": 21012 }, { "epoch": 2.160037002775208, "grad_norm": 0.07014864683151245, "learning_rate": 0.01, "loss": 2.0285, "step": 21015 }, { "epoch": 2.160345359235276, "grad_norm": 0.1024969145655632, "learning_rate": 0.01, "loss": 2.031, "step": 21018 }, { "epoch": 2.160653715695344, "grad_norm": 0.059565525501966476, "learning_rate": 0.01, "loss": 2.0334, "step": 21021 }, { "epoch": 2.1609620721554115, "grad_norm": 0.04438649117946625, "learning_rate": 0.01, "loss": 2.0736, "step": 21024 }, { "epoch": 2.1612704286154796, "grad_norm": 0.06552638858556747, "learning_rate": 0.01, "loss": 2.0449, "step": 21027 }, { "epoch": 2.1615787850755472, "grad_norm": 0.044327035546302795, "learning_rate": 0.01, "loss": 2.0512, "step": 21030 }, { "epoch": 2.1618871415356153, "grad_norm": 0.04915094003081322, "learning_rate": 0.01, "loss": 2.0634, "step": 21033 }, { "epoch": 2.162195497995683, "grad_norm": 0.052909743040800095, "learning_rate": 0.01, "loss": 2.0408, "step": 21036 }, { "epoch": 2.1625038544557507, "grad_norm": 0.09883973747491837, "learning_rate": 0.01, "loss": 2.0107, "step": 21039 }, { "epoch": 2.1628122109158188, "grad_norm": 0.07719819992780685, "learning_rate": 0.01, "loss": 2.0357, "step": 21042 }, { "epoch": 2.1631205673758864, "grad_norm": 0.09743590652942657, "learning_rate": 0.01, "loss": 2.0263, "step": 21045 }, { "epoch": 2.1634289238359545, "grad_norm": 0.06583153456449509, "learning_rate": 0.01, "loss": 2.0249, "step": 21048 }, { "epoch": 2.163737280296022, "grad_norm": 0.10464660078287125, "learning_rate": 0.01, "loss": 2.0154, "step": 21051 }, { "epoch": 2.16404563675609, "grad_norm": 0.057555560022592545, "learning_rate": 0.01, "loss": 2.0545, "step": 21054 }, { "epoch": 2.164353993216158, "grad_norm": 0.04753732308745384, "learning_rate": 0.01, "loss": 2.054, "step": 21057 }, { "epoch": 2.1646623496762256, "grad_norm": 0.05236852541565895, "learning_rate": 0.01, "loss": 2.0336, "step": 21060 }, { "epoch": 2.1649707061362937, "grad_norm": 0.05231897532939911, "learning_rate": 0.01, "loss": 2.0303, "step": 21063 }, { "epoch": 2.1652790625963614, "grad_norm": 0.058431778103113174, "learning_rate": 0.01, "loss": 2.0387, "step": 21066 }, { "epoch": 2.165587419056429, "grad_norm": 0.04048459604382515, "learning_rate": 0.01, "loss": 2.0317, "step": 21069 }, { "epoch": 2.165895775516497, "grad_norm": 0.0941488966345787, "learning_rate": 0.01, "loss": 2.0113, "step": 21072 }, { "epoch": 2.166204131976565, "grad_norm": 0.04498032480478287, "learning_rate": 0.01, "loss": 2.0178, "step": 21075 }, { "epoch": 2.166512488436633, "grad_norm": 0.03668253496289253, "learning_rate": 0.01, "loss": 2.0354, "step": 21078 }, { "epoch": 2.1668208448967006, "grad_norm": 0.06661085039377213, "learning_rate": 0.01, "loss": 2.0221, "step": 21081 }, { "epoch": 2.1671292013567682, "grad_norm": 0.06425791233778, "learning_rate": 0.01, "loss": 2.0301, "step": 21084 }, { "epoch": 2.1674375578168363, "grad_norm": 0.046152301132678986, "learning_rate": 0.01, "loss": 2.0287, "step": 21087 }, { "epoch": 2.167745914276904, "grad_norm": 0.12584535777568817, "learning_rate": 0.01, "loss": 2.0364, "step": 21090 }, { "epoch": 2.168054270736972, "grad_norm": 0.04133312404155731, "learning_rate": 0.01, "loss": 2.0403, "step": 21093 }, { "epoch": 2.1683626271970398, "grad_norm": 0.04181768745183945, "learning_rate": 0.01, "loss": 2.0241, "step": 21096 }, { "epoch": 2.1686709836571074, "grad_norm": 0.04648204892873764, "learning_rate": 0.01, "loss": 2.0307, "step": 21099 }, { "epoch": 2.1689793401171755, "grad_norm": 0.044040024280548096, "learning_rate": 0.01, "loss": 2.0495, "step": 21102 }, { "epoch": 2.169287696577243, "grad_norm": 0.04033771902322769, "learning_rate": 0.01, "loss": 2.0496, "step": 21105 }, { "epoch": 2.1695960530373113, "grad_norm": 0.04693743214011192, "learning_rate": 0.01, "loss": 2.0492, "step": 21108 }, { "epoch": 2.169904409497379, "grad_norm": 0.07494813948869705, "learning_rate": 0.01, "loss": 2.0462, "step": 21111 }, { "epoch": 2.1702127659574466, "grad_norm": 0.09254445135593414, "learning_rate": 0.01, "loss": 2.0478, "step": 21114 }, { "epoch": 2.1705211224175147, "grad_norm": 0.13350296020507812, "learning_rate": 0.01, "loss": 2.0277, "step": 21117 }, { "epoch": 2.1708294788775824, "grad_norm": 0.051686085760593414, "learning_rate": 0.01, "loss": 2.0507, "step": 21120 }, { "epoch": 2.1711378353376505, "grad_norm": 0.043330930173397064, "learning_rate": 0.01, "loss": 2.0261, "step": 21123 }, { "epoch": 2.171446191797718, "grad_norm": 0.05219477042555809, "learning_rate": 0.01, "loss": 2.0299, "step": 21126 }, { "epoch": 2.171754548257786, "grad_norm": 0.04195651784539223, "learning_rate": 0.01, "loss": 2.0441, "step": 21129 }, { "epoch": 2.172062904717854, "grad_norm": 0.15802520513534546, "learning_rate": 0.01, "loss": 2.0609, "step": 21132 }, { "epoch": 2.1723712611779216, "grad_norm": 0.08619748800992966, "learning_rate": 0.01, "loss": 2.0542, "step": 21135 }, { "epoch": 2.1726796176379897, "grad_norm": 0.08852280676364899, "learning_rate": 0.01, "loss": 2.0287, "step": 21138 }, { "epoch": 2.1729879740980573, "grad_norm": 0.040646422654390335, "learning_rate": 0.01, "loss": 2.0289, "step": 21141 }, { "epoch": 2.173296330558125, "grad_norm": 0.06913924962282181, "learning_rate": 0.01, "loss": 2.0463, "step": 21144 }, { "epoch": 2.173604687018193, "grad_norm": 0.05461576208472252, "learning_rate": 0.01, "loss": 2.0382, "step": 21147 }, { "epoch": 2.1739130434782608, "grad_norm": 0.08613748103380203, "learning_rate": 0.01, "loss": 2.0412, "step": 21150 }, { "epoch": 2.174221399938329, "grad_norm": 0.06459856033325195, "learning_rate": 0.01, "loss": 2.0445, "step": 21153 }, { "epoch": 2.1745297563983965, "grad_norm": 0.08450223505496979, "learning_rate": 0.01, "loss": 2.0237, "step": 21156 }, { "epoch": 2.174838112858464, "grad_norm": 0.10049585998058319, "learning_rate": 0.01, "loss": 2.0458, "step": 21159 }, { "epoch": 2.1751464693185323, "grad_norm": 0.06031005084514618, "learning_rate": 0.01, "loss": 2.0668, "step": 21162 }, { "epoch": 2.1754548257786, "grad_norm": 0.06921012699604034, "learning_rate": 0.01, "loss": 2.053, "step": 21165 }, { "epoch": 2.175763182238668, "grad_norm": 0.044479697942733765, "learning_rate": 0.01, "loss": 2.0275, "step": 21168 }, { "epoch": 2.1760715386987357, "grad_norm": 0.0857187807559967, "learning_rate": 0.01, "loss": 2.0513, "step": 21171 }, { "epoch": 2.176379895158804, "grad_norm": 0.057432882487773895, "learning_rate": 0.01, "loss": 2.0622, "step": 21174 }, { "epoch": 2.1766882516188715, "grad_norm": 0.0905427411198616, "learning_rate": 0.01, "loss": 2.0574, "step": 21177 }, { "epoch": 2.176996608078939, "grad_norm": 0.05289644002914429, "learning_rate": 0.01, "loss": 2.0348, "step": 21180 }, { "epoch": 2.1773049645390072, "grad_norm": 0.06351148337125778, "learning_rate": 0.01, "loss": 2.0223, "step": 21183 }, { "epoch": 2.177613320999075, "grad_norm": 0.1098824068903923, "learning_rate": 0.01, "loss": 2.0534, "step": 21186 }, { "epoch": 2.1779216774591426, "grad_norm": 0.03698734566569328, "learning_rate": 0.01, "loss": 2.0265, "step": 21189 }, { "epoch": 2.1782300339192107, "grad_norm": 0.09595025330781937, "learning_rate": 0.01, "loss": 2.0596, "step": 21192 }, { "epoch": 2.1785383903792783, "grad_norm": 0.05725647136569023, "learning_rate": 0.01, "loss": 2.0568, "step": 21195 }, { "epoch": 2.1788467468393464, "grad_norm": 0.06952492892742157, "learning_rate": 0.01, "loss": 2.0119, "step": 21198 }, { "epoch": 2.179155103299414, "grad_norm": 0.06831461936235428, "learning_rate": 0.01, "loss": 2.0484, "step": 21201 }, { "epoch": 2.179463459759482, "grad_norm": 0.05125569924712181, "learning_rate": 0.01, "loss": 2.0593, "step": 21204 }, { "epoch": 2.17977181621955, "grad_norm": 0.053290076553821564, "learning_rate": 0.01, "loss": 2.0576, "step": 21207 }, { "epoch": 2.1800801726796175, "grad_norm": 0.05718007683753967, "learning_rate": 0.01, "loss": 2.0472, "step": 21210 }, { "epoch": 2.1803885291396856, "grad_norm": 0.0895228236913681, "learning_rate": 0.01, "loss": 2.0673, "step": 21213 }, { "epoch": 2.1806968855997533, "grad_norm": 0.05205732583999634, "learning_rate": 0.01, "loss": 2.0366, "step": 21216 }, { "epoch": 2.181005242059821, "grad_norm": 0.05785641819238663, "learning_rate": 0.01, "loss": 2.0557, "step": 21219 }, { "epoch": 2.181313598519889, "grad_norm": 0.0756557285785675, "learning_rate": 0.01, "loss": 2.0431, "step": 21222 }, { "epoch": 2.1816219549799567, "grad_norm": 0.07016823440790176, "learning_rate": 0.01, "loss": 2.0508, "step": 21225 }, { "epoch": 2.181930311440025, "grad_norm": 0.06859459728002548, "learning_rate": 0.01, "loss": 2.0418, "step": 21228 }, { "epoch": 2.1822386679000925, "grad_norm": 0.14140251278877258, "learning_rate": 0.01, "loss": 2.0473, "step": 21231 }, { "epoch": 2.1825470243601606, "grad_norm": 0.0544712096452713, "learning_rate": 0.01, "loss": 2.04, "step": 21234 }, { "epoch": 2.1828553808202282, "grad_norm": 0.04346593841910362, "learning_rate": 0.01, "loss": 2.0516, "step": 21237 }, { "epoch": 2.183163737280296, "grad_norm": 0.04261700063943863, "learning_rate": 0.01, "loss": 2.0431, "step": 21240 }, { "epoch": 2.183472093740364, "grad_norm": 0.04763154685497284, "learning_rate": 0.01, "loss": 2.0196, "step": 21243 }, { "epoch": 2.1837804502004317, "grad_norm": 0.06876801699399948, "learning_rate": 0.01, "loss": 2.0343, "step": 21246 }, { "epoch": 2.1840888066604993, "grad_norm": 0.07819974422454834, "learning_rate": 0.01, "loss": 2.0506, "step": 21249 }, { "epoch": 2.1843971631205674, "grad_norm": 0.06239667907357216, "learning_rate": 0.01, "loss": 2.0268, "step": 21252 }, { "epoch": 2.184705519580635, "grad_norm": 0.1095786988735199, "learning_rate": 0.01, "loss": 2.0398, "step": 21255 }, { "epoch": 2.185013876040703, "grad_norm": 0.055070340633392334, "learning_rate": 0.01, "loss": 2.0523, "step": 21258 }, { "epoch": 2.185322232500771, "grad_norm": 0.08038482069969177, "learning_rate": 0.01, "loss": 2.0222, "step": 21261 }, { "epoch": 2.185630588960839, "grad_norm": 0.06929390877485275, "learning_rate": 0.01, "loss": 2.0405, "step": 21264 }, { "epoch": 2.1859389454209066, "grad_norm": 0.054179031401872635, "learning_rate": 0.01, "loss": 2.0554, "step": 21267 }, { "epoch": 2.1862473018809743, "grad_norm": 0.06956303864717484, "learning_rate": 0.01, "loss": 2.0586, "step": 21270 }, { "epoch": 2.1865556583410424, "grad_norm": 0.14279653131961823, "learning_rate": 0.01, "loss": 2.0381, "step": 21273 }, { "epoch": 2.18686401480111, "grad_norm": 0.04268357530236244, "learning_rate": 0.01, "loss": 2.0404, "step": 21276 }, { "epoch": 2.1871723712611777, "grad_norm": 0.05189354717731476, "learning_rate": 0.01, "loss": 2.0405, "step": 21279 }, { "epoch": 2.187480727721246, "grad_norm": 0.05047158896923065, "learning_rate": 0.01, "loss": 2.0495, "step": 21282 }, { "epoch": 2.1877890841813135, "grad_norm": 0.041077565401792526, "learning_rate": 0.01, "loss": 2.0558, "step": 21285 }, { "epoch": 2.1880974406413816, "grad_norm": 0.03880000859498978, "learning_rate": 0.01, "loss": 2.0472, "step": 21288 }, { "epoch": 2.1884057971014492, "grad_norm": 0.1096898689866066, "learning_rate": 0.01, "loss": 2.0537, "step": 21291 }, { "epoch": 2.1887141535615173, "grad_norm": 0.04502374678850174, "learning_rate": 0.01, "loss": 2.0397, "step": 21294 }, { "epoch": 2.189022510021585, "grad_norm": 0.037158042192459106, "learning_rate": 0.01, "loss": 2.0117, "step": 21297 }, { "epoch": 2.1893308664816526, "grad_norm": 0.03381425887346268, "learning_rate": 0.01, "loss": 2.0297, "step": 21300 }, { "epoch": 2.1896392229417208, "grad_norm": 0.05572035536170006, "learning_rate": 0.01, "loss": 2.0575, "step": 21303 }, { "epoch": 2.1899475794017884, "grad_norm": 0.06287326663732529, "learning_rate": 0.01, "loss": 2.0341, "step": 21306 }, { "epoch": 2.190255935861856, "grad_norm": 0.07691732794046402, "learning_rate": 0.01, "loss": 2.0317, "step": 21309 }, { "epoch": 2.190564292321924, "grad_norm": 0.058651309460401535, "learning_rate": 0.01, "loss": 2.04, "step": 21312 }, { "epoch": 2.190872648781992, "grad_norm": 0.033279962837696075, "learning_rate": 0.01, "loss": 2.0325, "step": 21315 }, { "epoch": 2.19118100524206, "grad_norm": 0.08742087334394455, "learning_rate": 0.01, "loss": 2.0303, "step": 21318 }, { "epoch": 2.1914893617021276, "grad_norm": 0.0864923968911171, "learning_rate": 0.01, "loss": 2.0492, "step": 21321 }, { "epoch": 2.1917977181621957, "grad_norm": 0.10759606957435608, "learning_rate": 0.01, "loss": 2.0582, "step": 21324 }, { "epoch": 2.1921060746222634, "grad_norm": 0.058335281908512115, "learning_rate": 0.01, "loss": 2.019, "step": 21327 }, { "epoch": 2.192414431082331, "grad_norm": 0.04506481811404228, "learning_rate": 0.01, "loss": 2.027, "step": 21330 }, { "epoch": 2.192722787542399, "grad_norm": 0.0454195998609066, "learning_rate": 0.01, "loss": 2.0297, "step": 21333 }, { "epoch": 2.193031144002467, "grad_norm": 0.051547158509492874, "learning_rate": 0.01, "loss": 1.9814, "step": 21336 }, { "epoch": 2.193339500462535, "grad_norm": 0.09826447069644928, "learning_rate": 0.01, "loss": 2.0518, "step": 21339 }, { "epoch": 2.1936478569226026, "grad_norm": 0.05799272283911705, "learning_rate": 0.01, "loss": 2.0649, "step": 21342 }, { "epoch": 2.19395621338267, "grad_norm": 0.12493482232093811, "learning_rate": 0.01, "loss": 2.0257, "step": 21345 }, { "epoch": 2.1942645698427383, "grad_norm": 0.04930184409022331, "learning_rate": 0.01, "loss": 2.0228, "step": 21348 }, { "epoch": 2.194572926302806, "grad_norm": 0.04257272928953171, "learning_rate": 0.01, "loss": 2.0579, "step": 21351 }, { "epoch": 2.194881282762874, "grad_norm": 0.04625258222222328, "learning_rate": 0.01, "loss": 2.0244, "step": 21354 }, { "epoch": 2.1951896392229417, "grad_norm": 0.04295830428600311, "learning_rate": 0.01, "loss": 2.032, "step": 21357 }, { "epoch": 2.1954979956830094, "grad_norm": 0.21663565933704376, "learning_rate": 0.01, "loss": 2.0489, "step": 21360 }, { "epoch": 2.1958063521430775, "grad_norm": 0.15513652563095093, "learning_rate": 0.01, "loss": 2.0473, "step": 21363 }, { "epoch": 2.196114708603145, "grad_norm": 0.08245841413736343, "learning_rate": 0.01, "loss": 2.0402, "step": 21366 }, { "epoch": 2.1964230650632133, "grad_norm": 0.03768035024404526, "learning_rate": 0.01, "loss": 2.0444, "step": 21369 }, { "epoch": 2.196731421523281, "grad_norm": 0.0586925707757473, "learning_rate": 0.01, "loss": 2.0299, "step": 21372 }, { "epoch": 2.1970397779833486, "grad_norm": 0.045760899782180786, "learning_rate": 0.01, "loss": 2.0484, "step": 21375 }, { "epoch": 2.1973481344434167, "grad_norm": 0.04357283189892769, "learning_rate": 0.01, "loss": 2.0231, "step": 21378 }, { "epoch": 2.1976564909034844, "grad_norm": 0.04477246478199959, "learning_rate": 0.01, "loss": 2.0211, "step": 21381 }, { "epoch": 2.1979648473635525, "grad_norm": 0.06785521656274796, "learning_rate": 0.01, "loss": 2.0532, "step": 21384 }, { "epoch": 2.19827320382362, "grad_norm": 0.04677508771419525, "learning_rate": 0.01, "loss": 2.0222, "step": 21387 }, { "epoch": 2.198581560283688, "grad_norm": 0.049355942755937576, "learning_rate": 0.01, "loss": 2.0488, "step": 21390 }, { "epoch": 2.198889916743756, "grad_norm": 0.11005277186632156, "learning_rate": 0.01, "loss": 2.0198, "step": 21393 }, { "epoch": 2.1991982732038236, "grad_norm": 0.10170245915651321, "learning_rate": 0.01, "loss": 2.0021, "step": 21396 }, { "epoch": 2.1995066296638917, "grad_norm": 0.045158207416534424, "learning_rate": 0.01, "loss": 2.0462, "step": 21399 }, { "epoch": 2.1998149861239593, "grad_norm": 0.0780436098575592, "learning_rate": 0.01, "loss": 2.0292, "step": 21402 }, { "epoch": 2.200123342584027, "grad_norm": 0.06062453240156174, "learning_rate": 0.01, "loss": 2.0437, "step": 21405 }, { "epoch": 2.200431699044095, "grad_norm": 0.09208519756793976, "learning_rate": 0.01, "loss": 2.0378, "step": 21408 }, { "epoch": 2.2007400555041627, "grad_norm": 0.05279922112822533, "learning_rate": 0.01, "loss": 2.0521, "step": 21411 }, { "epoch": 2.201048411964231, "grad_norm": 0.0831415057182312, "learning_rate": 0.01, "loss": 2.0394, "step": 21414 }, { "epoch": 2.2013567684242985, "grad_norm": 0.06481669098138809, "learning_rate": 0.01, "loss": 2.0119, "step": 21417 }, { "epoch": 2.201665124884366, "grad_norm": 0.08297551423311234, "learning_rate": 0.01, "loss": 2.0741, "step": 21420 }, { "epoch": 2.2019734813444343, "grad_norm": 0.06962350755929947, "learning_rate": 0.01, "loss": 2.0434, "step": 21423 }, { "epoch": 2.202281837804502, "grad_norm": 0.08055757731199265, "learning_rate": 0.01, "loss": 2.0334, "step": 21426 }, { "epoch": 2.20259019426457, "grad_norm": 0.09755895286798477, "learning_rate": 0.01, "loss": 2.0443, "step": 21429 }, { "epoch": 2.2028985507246377, "grad_norm": 0.04400679096579552, "learning_rate": 0.01, "loss": 2.0194, "step": 21432 }, { "epoch": 2.2032069071847054, "grad_norm": 0.04204344376921654, "learning_rate": 0.01, "loss": 2.063, "step": 21435 }, { "epoch": 2.2035152636447735, "grad_norm": 0.029974184930324554, "learning_rate": 0.01, "loss": 2.0211, "step": 21438 }, { "epoch": 2.203823620104841, "grad_norm": 0.07232589274644852, "learning_rate": 0.01, "loss": 2.0254, "step": 21441 }, { "epoch": 2.2041319765649092, "grad_norm": 0.06404844671487808, "learning_rate": 0.01, "loss": 2.0596, "step": 21444 }, { "epoch": 2.204440333024977, "grad_norm": 0.08751700818538666, "learning_rate": 0.01, "loss": 2.0576, "step": 21447 }, { "epoch": 2.2047486894850445, "grad_norm": 0.0371503084897995, "learning_rate": 0.01, "loss": 2.0424, "step": 21450 }, { "epoch": 2.2050570459451126, "grad_norm": 0.06034844368696213, "learning_rate": 0.01, "loss": 2.0306, "step": 21453 }, { "epoch": 2.2053654024051803, "grad_norm": 0.04261939972639084, "learning_rate": 0.01, "loss": 2.0323, "step": 21456 }, { "epoch": 2.2056737588652484, "grad_norm": 0.0612785667181015, "learning_rate": 0.01, "loss": 2.0349, "step": 21459 }, { "epoch": 2.205982115325316, "grad_norm": 0.05828654393553734, "learning_rate": 0.01, "loss": 2.0231, "step": 21462 }, { "epoch": 2.2062904717853837, "grad_norm": 0.06474754214286804, "learning_rate": 0.01, "loss": 2.0585, "step": 21465 }, { "epoch": 2.206598828245452, "grad_norm": 0.04646962508559227, "learning_rate": 0.01, "loss": 2.0337, "step": 21468 }, { "epoch": 2.2069071847055195, "grad_norm": 0.07051596790552139, "learning_rate": 0.01, "loss": 2.0218, "step": 21471 }, { "epoch": 2.2072155411655876, "grad_norm": 0.05658755078911781, "learning_rate": 0.01, "loss": 2.0347, "step": 21474 }, { "epoch": 2.2075238976256553, "grad_norm": 0.039348311722278595, "learning_rate": 0.01, "loss": 2.0587, "step": 21477 }, { "epoch": 2.207832254085723, "grad_norm": 0.030550241470336914, "learning_rate": 0.01, "loss": 2.0412, "step": 21480 }, { "epoch": 2.208140610545791, "grad_norm": 0.11341346800327301, "learning_rate": 0.01, "loss": 2.0416, "step": 21483 }, { "epoch": 2.2084489670058587, "grad_norm": 0.07061111927032471, "learning_rate": 0.01, "loss": 2.0117, "step": 21486 }, { "epoch": 2.208757323465927, "grad_norm": 0.10256624966859818, "learning_rate": 0.01, "loss": 2.0482, "step": 21489 }, { "epoch": 2.2090656799259945, "grad_norm": 0.06658724695444107, "learning_rate": 0.01, "loss": 2.0544, "step": 21492 }, { "epoch": 2.209374036386062, "grad_norm": 0.12220150977373123, "learning_rate": 0.01, "loss": 2.0399, "step": 21495 }, { "epoch": 2.20968239284613, "grad_norm": 0.05570116639137268, "learning_rate": 0.01, "loss": 2.0238, "step": 21498 }, { "epoch": 2.209990749306198, "grad_norm": 0.04273837059736252, "learning_rate": 0.01, "loss": 2.032, "step": 21501 }, { "epoch": 2.210299105766266, "grad_norm": 0.04138748720288277, "learning_rate": 0.01, "loss": 2.0497, "step": 21504 }, { "epoch": 2.2106074622263336, "grad_norm": 0.08082164078950882, "learning_rate": 0.01, "loss": 2.025, "step": 21507 }, { "epoch": 2.2109158186864013, "grad_norm": 0.06663049012422562, "learning_rate": 0.01, "loss": 2.0298, "step": 21510 }, { "epoch": 2.2112241751464694, "grad_norm": 0.0444667711853981, "learning_rate": 0.01, "loss": 2.0469, "step": 21513 }, { "epoch": 2.211532531606537, "grad_norm": 0.04407314583659172, "learning_rate": 0.01, "loss": 2.0667, "step": 21516 }, { "epoch": 2.211840888066605, "grad_norm": 0.03877383843064308, "learning_rate": 0.01, "loss": 2.0329, "step": 21519 }, { "epoch": 2.212149244526673, "grad_norm": 0.059297189116477966, "learning_rate": 0.01, "loss": 2.0441, "step": 21522 }, { "epoch": 2.2124576009867405, "grad_norm": 0.06609878689050674, "learning_rate": 0.01, "loss": 2.0263, "step": 21525 }, { "epoch": 2.2127659574468086, "grad_norm": 0.06935823708772659, "learning_rate": 0.01, "loss": 2.0295, "step": 21528 }, { "epoch": 2.2130743139068763, "grad_norm": 0.07610715180635452, "learning_rate": 0.01, "loss": 2.0424, "step": 21531 }, { "epoch": 2.2133826703669444, "grad_norm": 0.10587569326162338, "learning_rate": 0.01, "loss": 2.0636, "step": 21534 }, { "epoch": 2.213691026827012, "grad_norm": 0.05116620659828186, "learning_rate": 0.01, "loss": 2.0232, "step": 21537 }, { "epoch": 2.2139993832870797, "grad_norm": 0.03773776814341545, "learning_rate": 0.01, "loss": 2.0438, "step": 21540 }, { "epoch": 2.214307739747148, "grad_norm": 0.05412130430340767, "learning_rate": 0.01, "loss": 2.0522, "step": 21543 }, { "epoch": 2.2146160962072154, "grad_norm": 0.03664164990186691, "learning_rate": 0.01, "loss": 2.0535, "step": 21546 }, { "epoch": 2.2149244526672835, "grad_norm": 0.04415920004248619, "learning_rate": 0.01, "loss": 2.0113, "step": 21549 }, { "epoch": 2.215232809127351, "grad_norm": 0.05737615004181862, "learning_rate": 0.01, "loss": 2.0397, "step": 21552 }, { "epoch": 2.215541165587419, "grad_norm": 0.032385457307100296, "learning_rate": 0.01, "loss": 2.0325, "step": 21555 }, { "epoch": 2.215849522047487, "grad_norm": 0.0982925221323967, "learning_rate": 0.01, "loss": 2.0653, "step": 21558 }, { "epoch": 2.2161578785075546, "grad_norm": 0.03911735862493515, "learning_rate": 0.01, "loss": 2.0346, "step": 21561 }, { "epoch": 2.2164662349676227, "grad_norm": 0.07814744859933853, "learning_rate": 0.01, "loss": 2.0445, "step": 21564 }, { "epoch": 2.2167745914276904, "grad_norm": 0.05368256941437721, "learning_rate": 0.01, "loss": 2.0689, "step": 21567 }, { "epoch": 2.217082947887758, "grad_norm": 0.046178530901670456, "learning_rate": 0.01, "loss": 2.0402, "step": 21570 }, { "epoch": 2.217391304347826, "grad_norm": 0.047109801322221756, "learning_rate": 0.01, "loss": 2.0721, "step": 21573 }, { "epoch": 2.217699660807894, "grad_norm": 0.05443650484085083, "learning_rate": 0.01, "loss": 2.0416, "step": 21576 }, { "epoch": 2.218008017267962, "grad_norm": 0.13156400620937347, "learning_rate": 0.01, "loss": 2.0581, "step": 21579 }, { "epoch": 2.2183163737280296, "grad_norm": 0.04178638756275177, "learning_rate": 0.01, "loss": 2.0517, "step": 21582 }, { "epoch": 2.2186247301880972, "grad_norm": 0.042627740651369095, "learning_rate": 0.01, "loss": 2.0541, "step": 21585 }, { "epoch": 2.2189330866481654, "grad_norm": 0.05318658426403999, "learning_rate": 0.01, "loss": 2.0361, "step": 21588 }, { "epoch": 2.219241443108233, "grad_norm": 0.061288055032491684, "learning_rate": 0.01, "loss": 2.0425, "step": 21591 }, { "epoch": 2.219549799568301, "grad_norm": 0.06663260608911514, "learning_rate": 0.01, "loss": 2.0549, "step": 21594 }, { "epoch": 2.219858156028369, "grad_norm": 0.04567466303706169, "learning_rate": 0.01, "loss": 2.03, "step": 21597 }, { "epoch": 2.2201665124884364, "grad_norm": 0.12566886842250824, "learning_rate": 0.01, "loss": 2.0527, "step": 21600 }, { "epoch": 2.2204748689485045, "grad_norm": 0.03933155536651611, "learning_rate": 0.01, "loss": 2.022, "step": 21603 }, { "epoch": 2.220783225408572, "grad_norm": 0.04617391526699066, "learning_rate": 0.01, "loss": 2.0109, "step": 21606 }, { "epoch": 2.2210915818686403, "grad_norm": 0.05472411960363388, "learning_rate": 0.01, "loss": 2.0412, "step": 21609 }, { "epoch": 2.221399938328708, "grad_norm": 0.05556654930114746, "learning_rate": 0.01, "loss": 2.0214, "step": 21612 }, { "epoch": 2.2217082947887756, "grad_norm": 0.05096900090575218, "learning_rate": 0.01, "loss": 2.0524, "step": 21615 }, { "epoch": 2.2220166512488437, "grad_norm": 0.039425577968358994, "learning_rate": 0.01, "loss": 2.0368, "step": 21618 }, { "epoch": 2.2223250077089114, "grad_norm": 0.05080854892730713, "learning_rate": 0.01, "loss": 2.0494, "step": 21621 }, { "epoch": 2.2226333641689795, "grad_norm": 0.03824865445494652, "learning_rate": 0.01, "loss": 2.0497, "step": 21624 }, { "epoch": 2.222941720629047, "grad_norm": 0.03814932331442833, "learning_rate": 0.01, "loss": 2.0388, "step": 21627 }, { "epoch": 2.223250077089115, "grad_norm": 0.057797808200120926, "learning_rate": 0.01, "loss": 2.046, "step": 21630 }, { "epoch": 2.223558433549183, "grad_norm": 0.06596177071332932, "learning_rate": 0.01, "loss": 2.0572, "step": 21633 }, { "epoch": 2.2238667900092506, "grad_norm": 0.13438032567501068, "learning_rate": 0.01, "loss": 2.04, "step": 21636 }, { "epoch": 2.2241751464693187, "grad_norm": 0.06200256571173668, "learning_rate": 0.01, "loss": 2.0563, "step": 21639 }, { "epoch": 2.2244835029293863, "grad_norm": 0.06471807509660721, "learning_rate": 0.01, "loss": 2.0264, "step": 21642 }, { "epoch": 2.224791859389454, "grad_norm": 0.06439206004142761, "learning_rate": 0.01, "loss": 2.0425, "step": 21645 }, { "epoch": 2.225100215849522, "grad_norm": 0.0768360123038292, "learning_rate": 0.01, "loss": 2.0494, "step": 21648 }, { "epoch": 2.2254085723095898, "grad_norm": 0.10393831878900528, "learning_rate": 0.01, "loss": 2.0455, "step": 21651 }, { "epoch": 2.225716928769658, "grad_norm": 0.03999519720673561, "learning_rate": 0.01, "loss": 2.0168, "step": 21654 }, { "epoch": 2.2260252852297255, "grad_norm": 0.04620358720421791, "learning_rate": 0.01, "loss": 2.0576, "step": 21657 }, { "epoch": 2.226333641689793, "grad_norm": 0.05364964157342911, "learning_rate": 0.01, "loss": 2.0409, "step": 21660 }, { "epoch": 2.2266419981498613, "grad_norm": 0.049792349338531494, "learning_rate": 0.01, "loss": 2.0315, "step": 21663 }, { "epoch": 2.226950354609929, "grad_norm": 0.0651509091258049, "learning_rate": 0.01, "loss": 2.0176, "step": 21666 }, { "epoch": 2.227258711069997, "grad_norm": 0.035688720643520355, "learning_rate": 0.01, "loss": 2.0509, "step": 21669 }, { "epoch": 2.2275670675300647, "grad_norm": 0.06412792950868607, "learning_rate": 0.01, "loss": 2.0007, "step": 21672 }, { "epoch": 2.2278754239901324, "grad_norm": 0.08088821917772293, "learning_rate": 0.01, "loss": 2.0421, "step": 21675 }, { "epoch": 2.2281837804502005, "grad_norm": 0.060282256454229355, "learning_rate": 0.01, "loss": 2.0361, "step": 21678 }, { "epoch": 2.228492136910268, "grad_norm": 0.09816791117191315, "learning_rate": 0.01, "loss": 2.0317, "step": 21681 }, { "epoch": 2.2288004933703363, "grad_norm": 0.06672241538763046, "learning_rate": 0.01, "loss": 2.0559, "step": 21684 }, { "epoch": 2.229108849830404, "grad_norm": 0.06586040556430817, "learning_rate": 0.01, "loss": 2.0375, "step": 21687 }, { "epoch": 2.2294172062904716, "grad_norm": 0.0655137374997139, "learning_rate": 0.01, "loss": 2.0189, "step": 21690 }, { "epoch": 2.2297255627505397, "grad_norm": 0.08448750525712967, "learning_rate": 0.01, "loss": 2.0292, "step": 21693 }, { "epoch": 2.2300339192106073, "grad_norm": 0.05828822776675224, "learning_rate": 0.01, "loss": 2.043, "step": 21696 }, { "epoch": 2.2303422756706754, "grad_norm": 0.0816628485918045, "learning_rate": 0.01, "loss": 2.0362, "step": 21699 }, { "epoch": 2.230650632130743, "grad_norm": 0.038368817418813705, "learning_rate": 0.01, "loss": 2.029, "step": 21702 }, { "epoch": 2.230958988590811, "grad_norm": 0.08584286272525787, "learning_rate": 0.01, "loss": 2.0183, "step": 21705 }, { "epoch": 2.231267345050879, "grad_norm": 0.08528412878513336, "learning_rate": 0.01, "loss": 2.0185, "step": 21708 }, { "epoch": 2.2315757015109465, "grad_norm": 0.07158780843019485, "learning_rate": 0.01, "loss": 2.0437, "step": 21711 }, { "epoch": 2.2318840579710146, "grad_norm": 0.07810889184474945, "learning_rate": 0.01, "loss": 2.0374, "step": 21714 }, { "epoch": 2.2321924144310823, "grad_norm": 0.07769618928432465, "learning_rate": 0.01, "loss": 2.038, "step": 21717 }, { "epoch": 2.23250077089115, "grad_norm": 0.04515406861901283, "learning_rate": 0.01, "loss": 2.0307, "step": 21720 }, { "epoch": 2.232809127351218, "grad_norm": 0.11805865168571472, "learning_rate": 0.01, "loss": 2.0203, "step": 21723 }, { "epoch": 2.2331174838112857, "grad_norm": 0.11899860948324203, "learning_rate": 0.01, "loss": 2.0184, "step": 21726 }, { "epoch": 2.233425840271354, "grad_norm": 0.03309144452214241, "learning_rate": 0.01, "loss": 2.0307, "step": 21729 }, { "epoch": 2.2337341967314215, "grad_norm": 0.03389447182416916, "learning_rate": 0.01, "loss": 2.0257, "step": 21732 }, { "epoch": 2.2340425531914896, "grad_norm": 0.03472166880965233, "learning_rate": 0.01, "loss": 2.0386, "step": 21735 }, { "epoch": 2.2343509096515572, "grad_norm": 0.11815425753593445, "learning_rate": 0.01, "loss": 2.0302, "step": 21738 }, { "epoch": 2.234659266111625, "grad_norm": 0.08007802814245224, "learning_rate": 0.01, "loss": 2.0047, "step": 21741 }, { "epoch": 2.234967622571693, "grad_norm": 0.05053863301873207, "learning_rate": 0.01, "loss": 2.0119, "step": 21744 }, { "epoch": 2.2352759790317607, "grad_norm": 0.12116878479719162, "learning_rate": 0.01, "loss": 2.0189, "step": 21747 }, { "epoch": 2.2355843354918283, "grad_norm": 0.06733332574367523, "learning_rate": 0.01, "loss": 2.0441, "step": 21750 }, { "epoch": 2.2358926919518964, "grad_norm": 0.05141659080982208, "learning_rate": 0.01, "loss": 2.0315, "step": 21753 }, { "epoch": 2.236201048411964, "grad_norm": 0.03695325180888176, "learning_rate": 0.01, "loss": 2.0249, "step": 21756 }, { "epoch": 2.236509404872032, "grad_norm": 0.033388737589120865, "learning_rate": 0.01, "loss": 2.0238, "step": 21759 }, { "epoch": 2.2368177613321, "grad_norm": 0.03913639858365059, "learning_rate": 0.01, "loss": 2.0286, "step": 21762 }, { "epoch": 2.237126117792168, "grad_norm": 0.048513270914554596, "learning_rate": 0.01, "loss": 2.0277, "step": 21765 }, { "epoch": 2.2374344742522356, "grad_norm": 0.061340250074863434, "learning_rate": 0.01, "loss": 2.0295, "step": 21768 }, { "epoch": 2.2377428307123033, "grad_norm": 0.042784787714481354, "learning_rate": 0.01, "loss": 2.0306, "step": 21771 }, { "epoch": 2.2380511871723714, "grad_norm": 0.04124082252383232, "learning_rate": 0.01, "loss": 2.0478, "step": 21774 }, { "epoch": 2.238359543632439, "grad_norm": 0.03749304264783859, "learning_rate": 0.01, "loss": 2.0067, "step": 21777 }, { "epoch": 2.2386679000925067, "grad_norm": 0.12073452770709991, "learning_rate": 0.01, "loss": 2.0382, "step": 21780 }, { "epoch": 2.238976256552575, "grad_norm": 0.06613750755786896, "learning_rate": 0.01, "loss": 2.066, "step": 21783 }, { "epoch": 2.2392846130126425, "grad_norm": 0.11562148481607437, "learning_rate": 0.01, "loss": 2.0542, "step": 21786 }, { "epoch": 2.2395929694727106, "grad_norm": 0.06355132162570953, "learning_rate": 0.01, "loss": 2.0439, "step": 21789 }, { "epoch": 2.2399013259327782, "grad_norm": 0.09228016436100006, "learning_rate": 0.01, "loss": 2.066, "step": 21792 }, { "epoch": 2.2402096823928463, "grad_norm": 0.07648111879825592, "learning_rate": 0.01, "loss": 2.0334, "step": 21795 }, { "epoch": 2.240518038852914, "grad_norm": 0.0650177150964737, "learning_rate": 0.01, "loss": 2.0415, "step": 21798 }, { "epoch": 2.2408263953129817, "grad_norm": 0.09675853699445724, "learning_rate": 0.01, "loss": 2.0483, "step": 21801 }, { "epoch": 2.2411347517730498, "grad_norm": 0.045449864119291306, "learning_rate": 0.01, "loss": 2.0415, "step": 21804 }, { "epoch": 2.2414431082331174, "grad_norm": 0.0982547327876091, "learning_rate": 0.01, "loss": 2.0369, "step": 21807 }, { "epoch": 2.241751464693185, "grad_norm": 0.060201246291399, "learning_rate": 0.01, "loss": 2.0366, "step": 21810 }, { "epoch": 2.242059821153253, "grad_norm": 0.04503628611564636, "learning_rate": 0.01, "loss": 2.0411, "step": 21813 }, { "epoch": 2.242368177613321, "grad_norm": 0.04854295030236244, "learning_rate": 0.01, "loss": 2.0485, "step": 21816 }, { "epoch": 2.242676534073389, "grad_norm": 0.12228553742170334, "learning_rate": 0.01, "loss": 2.0204, "step": 21819 }, { "epoch": 2.2429848905334566, "grad_norm": 0.10491406172513962, "learning_rate": 0.01, "loss": 2.0546, "step": 21822 }, { "epoch": 2.2432932469935247, "grad_norm": 0.1188177838921547, "learning_rate": 0.01, "loss": 2.0324, "step": 21825 }, { "epoch": 2.2436016034535924, "grad_norm": 0.10877541452646255, "learning_rate": 0.01, "loss": 2.0433, "step": 21828 }, { "epoch": 2.24390995991366, "grad_norm": 0.03375115245580673, "learning_rate": 0.01, "loss": 2.0475, "step": 21831 }, { "epoch": 2.244218316373728, "grad_norm": 0.07588639855384827, "learning_rate": 0.01, "loss": 2.0192, "step": 21834 }, { "epoch": 2.244526672833796, "grad_norm": 0.04841979220509529, "learning_rate": 0.01, "loss": 2.0324, "step": 21837 }, { "epoch": 2.2448350292938635, "grad_norm": 0.041367027908563614, "learning_rate": 0.01, "loss": 2.0436, "step": 21840 }, { "epoch": 2.2451433857539316, "grad_norm": 0.044548399746418, "learning_rate": 0.01, "loss": 2.0349, "step": 21843 }, { "epoch": 2.2454517422139992, "grad_norm": 0.049429114907979965, "learning_rate": 0.01, "loss": 2.007, "step": 21846 }, { "epoch": 2.2457600986740673, "grad_norm": 0.07232322543859482, "learning_rate": 0.01, "loss": 2.028, "step": 21849 }, { "epoch": 2.246068455134135, "grad_norm": 0.05866502597928047, "learning_rate": 0.01, "loss": 2.0255, "step": 21852 }, { "epoch": 2.246376811594203, "grad_norm": 0.08406642079353333, "learning_rate": 0.01, "loss": 2.045, "step": 21855 }, { "epoch": 2.2466851680542708, "grad_norm": 0.08692225068807602, "learning_rate": 0.01, "loss": 2.0074, "step": 21858 }, { "epoch": 2.2469935245143384, "grad_norm": 0.09182562679052353, "learning_rate": 0.01, "loss": 2.0245, "step": 21861 }, { "epoch": 2.2473018809744065, "grad_norm": 0.07580506801605225, "learning_rate": 0.01, "loss": 2.0281, "step": 21864 }, { "epoch": 2.247610237434474, "grad_norm": 0.09604424238204956, "learning_rate": 0.01, "loss": 2.054, "step": 21867 }, { "epoch": 2.2479185938945423, "grad_norm": 0.05383382737636566, "learning_rate": 0.01, "loss": 2.0316, "step": 21870 }, { "epoch": 2.24822695035461, "grad_norm": 0.035557687282562256, "learning_rate": 0.01, "loss": 2.0246, "step": 21873 }, { "epoch": 2.2485353068146776, "grad_norm": 0.0330585315823555, "learning_rate": 0.01, "loss": 2.0335, "step": 21876 }, { "epoch": 2.2488436632747457, "grad_norm": 0.07125352323055267, "learning_rate": 0.01, "loss": 2.0058, "step": 21879 }, { "epoch": 2.2491520197348134, "grad_norm": 0.07722420245409012, "learning_rate": 0.01, "loss": 2.0428, "step": 21882 }, { "epoch": 2.2494603761948815, "grad_norm": 0.05086112394928932, "learning_rate": 0.01, "loss": 2.0444, "step": 21885 }, { "epoch": 2.249768732654949, "grad_norm": 0.055818263441324234, "learning_rate": 0.01, "loss": 2.0381, "step": 21888 }, { "epoch": 2.250077089115017, "grad_norm": 0.06524482369422913, "learning_rate": 0.01, "loss": 2.0543, "step": 21891 }, { "epoch": 2.250385445575085, "grad_norm": 0.051445212215185165, "learning_rate": 0.01, "loss": 2.0594, "step": 21894 }, { "epoch": 2.2506938020351526, "grad_norm": 0.057153645902872086, "learning_rate": 0.01, "loss": 2.0113, "step": 21897 }, { "epoch": 2.2510021584952202, "grad_norm": 0.08242445439100266, "learning_rate": 0.01, "loss": 2.0465, "step": 21900 }, { "epoch": 2.2513105149552883, "grad_norm": 0.06253752112388611, "learning_rate": 0.01, "loss": 2.0405, "step": 21903 }, { "epoch": 2.251618871415356, "grad_norm": 0.05388238653540611, "learning_rate": 0.01, "loss": 2.0548, "step": 21906 }, { "epoch": 2.251927227875424, "grad_norm": 0.05570909380912781, "learning_rate": 0.01, "loss": 2.0474, "step": 21909 }, { "epoch": 2.2522355843354918, "grad_norm": 0.038132745772600174, "learning_rate": 0.01, "loss": 2.0473, "step": 21912 }, { "epoch": 2.25254394079556, "grad_norm": 0.07434429973363876, "learning_rate": 0.01, "loss": 2.0306, "step": 21915 }, { "epoch": 2.2528522972556275, "grad_norm": 0.05396199971437454, "learning_rate": 0.01, "loss": 2.0239, "step": 21918 }, { "epoch": 2.253160653715695, "grad_norm": 0.06541625410318375, "learning_rate": 0.01, "loss": 2.0513, "step": 21921 }, { "epoch": 2.2534690101757633, "grad_norm": 0.07078956067562103, "learning_rate": 0.01, "loss": 2.0333, "step": 21924 }, { "epoch": 2.253777366635831, "grad_norm": 0.06990516930818558, "learning_rate": 0.01, "loss": 2.0078, "step": 21927 }, { "epoch": 2.254085723095899, "grad_norm": 0.049635425209999084, "learning_rate": 0.01, "loss": 2.0283, "step": 21930 }, { "epoch": 2.2543940795559667, "grad_norm": 0.04051049426198006, "learning_rate": 0.01, "loss": 2.0293, "step": 21933 }, { "epoch": 2.2547024360160344, "grad_norm": 0.11082252860069275, "learning_rate": 0.01, "loss": 2.0278, "step": 21936 }, { "epoch": 2.2550107924761025, "grad_norm": 0.0768849179148674, "learning_rate": 0.01, "loss": 2.0089, "step": 21939 }, { "epoch": 2.25531914893617, "grad_norm": 0.0420587994158268, "learning_rate": 0.01, "loss": 2.0188, "step": 21942 }, { "epoch": 2.2556275053962382, "grad_norm": 0.03924650698900223, "learning_rate": 0.01, "loss": 2.035, "step": 21945 }, { "epoch": 2.255935861856306, "grad_norm": 0.058580152690410614, "learning_rate": 0.01, "loss": 2.0358, "step": 21948 }, { "epoch": 2.2562442183163736, "grad_norm": 0.050332482904195786, "learning_rate": 0.01, "loss": 2.0261, "step": 21951 }, { "epoch": 2.2565525747764417, "grad_norm": 0.0472397580742836, "learning_rate": 0.01, "loss": 2.0196, "step": 21954 }, { "epoch": 2.2568609312365093, "grad_norm": 0.0500427670776844, "learning_rate": 0.01, "loss": 2.033, "step": 21957 }, { "epoch": 2.2571692876965774, "grad_norm": 0.08899175375699997, "learning_rate": 0.01, "loss": 2.0503, "step": 21960 }, { "epoch": 2.257477644156645, "grad_norm": 0.06647983938455582, "learning_rate": 0.01, "loss": 2.0087, "step": 21963 }, { "epoch": 2.2577860006167128, "grad_norm": 0.12703396379947662, "learning_rate": 0.01, "loss": 2.0532, "step": 21966 }, { "epoch": 2.258094357076781, "grad_norm": 0.06023648753762245, "learning_rate": 0.01, "loss": 2.0346, "step": 21969 }, { "epoch": 2.2584027135368485, "grad_norm": 0.05025608092546463, "learning_rate": 0.01, "loss": 2.0431, "step": 21972 }, { "epoch": 2.2587110699969166, "grad_norm": 0.043917398899793625, "learning_rate": 0.01, "loss": 2.0184, "step": 21975 }, { "epoch": 2.2590194264569843, "grad_norm": 0.03574421629309654, "learning_rate": 0.01, "loss": 2.021, "step": 21978 }, { "epoch": 2.259327782917052, "grad_norm": 0.03546285256743431, "learning_rate": 0.01, "loss": 2.0116, "step": 21981 }, { "epoch": 2.25963613937712, "grad_norm": 0.039241861552000046, "learning_rate": 0.01, "loss": 2.0314, "step": 21984 }, { "epoch": 2.2599444958371877, "grad_norm": 0.09643664956092834, "learning_rate": 0.01, "loss": 2.0378, "step": 21987 }, { "epoch": 2.260252852297256, "grad_norm": 0.08918357640504837, "learning_rate": 0.01, "loss": 2.0377, "step": 21990 }, { "epoch": 2.2605612087573235, "grad_norm": 0.05018826946616173, "learning_rate": 0.01, "loss": 2.0183, "step": 21993 }, { "epoch": 2.260869565217391, "grad_norm": 0.05288619175553322, "learning_rate": 0.01, "loss": 2.0333, "step": 21996 }, { "epoch": 2.2611779216774592, "grad_norm": 0.053800489753484726, "learning_rate": 0.01, "loss": 2.0359, "step": 21999 }, { "epoch": 2.261486278137527, "grad_norm": 0.11081501096487045, "learning_rate": 0.01, "loss": 2.0517, "step": 22002 }, { "epoch": 2.261794634597595, "grad_norm": 0.045440319925546646, "learning_rate": 0.01, "loss": 2.0105, "step": 22005 }, { "epoch": 2.2621029910576627, "grad_norm": 0.07007669657468796, "learning_rate": 0.01, "loss": 2.0411, "step": 22008 }, { "epoch": 2.2624113475177303, "grad_norm": 0.04412767291069031, "learning_rate": 0.01, "loss": 2.0345, "step": 22011 }, { "epoch": 2.2627197039777984, "grad_norm": 0.034647636115550995, "learning_rate": 0.01, "loss": 2.0236, "step": 22014 }, { "epoch": 2.263028060437866, "grad_norm": 0.04766898602247238, "learning_rate": 0.01, "loss": 2.0214, "step": 22017 }, { "epoch": 2.263336416897934, "grad_norm": 0.0641711950302124, "learning_rate": 0.01, "loss": 2.0195, "step": 22020 }, { "epoch": 2.263644773358002, "grad_norm": 0.05316673591732979, "learning_rate": 0.01, "loss": 2.0137, "step": 22023 }, { "epoch": 2.2639531298180695, "grad_norm": 0.03488560765981674, "learning_rate": 0.01, "loss": 2.0221, "step": 22026 }, { "epoch": 2.2642614862781376, "grad_norm": 0.05415144935250282, "learning_rate": 0.01, "loss": 2.0551, "step": 22029 }, { "epoch": 2.2645698427382053, "grad_norm": 0.08091796189546585, "learning_rate": 0.01, "loss": 2.0339, "step": 22032 }, { "epoch": 2.2648781991982734, "grad_norm": 0.0850050300359726, "learning_rate": 0.01, "loss": 2.0213, "step": 22035 }, { "epoch": 2.265186555658341, "grad_norm": 0.13837046921253204, "learning_rate": 0.01, "loss": 2.0352, "step": 22038 }, { "epoch": 2.2654949121184087, "grad_norm": 0.05676966533064842, "learning_rate": 0.01, "loss": 2.0357, "step": 22041 }, { "epoch": 2.265803268578477, "grad_norm": 0.1171020120382309, "learning_rate": 0.01, "loss": 2.0048, "step": 22044 }, { "epoch": 2.2661116250385445, "grad_norm": 0.04077135771512985, "learning_rate": 0.01, "loss": 2.0386, "step": 22047 }, { "epoch": 2.2664199814986126, "grad_norm": 0.03570380434393883, "learning_rate": 0.01, "loss": 2.0046, "step": 22050 }, { "epoch": 2.2667283379586802, "grad_norm": 0.0496290847659111, "learning_rate": 0.01, "loss": 2.0439, "step": 22053 }, { "epoch": 2.267036694418748, "grad_norm": 0.06205829232931137, "learning_rate": 0.01, "loss": 2.0346, "step": 22056 }, { "epoch": 2.267345050878816, "grad_norm": 0.04645274206995964, "learning_rate": 0.01, "loss": 2.0237, "step": 22059 }, { "epoch": 2.2676534073388837, "grad_norm": 0.04779735952615738, "learning_rate": 0.01, "loss": 2.009, "step": 22062 }, { "epoch": 2.2679617637989518, "grad_norm": 0.05166240781545639, "learning_rate": 0.01, "loss": 2.0391, "step": 22065 }, { "epoch": 2.2682701202590194, "grad_norm": 0.14004342257976532, "learning_rate": 0.01, "loss": 2.0309, "step": 22068 }, { "epoch": 2.268578476719087, "grad_norm": 0.06366194784641266, "learning_rate": 0.01, "loss": 2.0306, "step": 22071 }, { "epoch": 2.268886833179155, "grad_norm": 0.04914015159010887, "learning_rate": 0.01, "loss": 2.026, "step": 22074 }, { "epoch": 2.269195189639223, "grad_norm": 0.07121274620294571, "learning_rate": 0.01, "loss": 2.0302, "step": 22077 }, { "epoch": 2.269503546099291, "grad_norm": 0.048453398048877716, "learning_rate": 0.01, "loss": 2.047, "step": 22080 }, { "epoch": 2.2698119025593586, "grad_norm": 0.03777848929166794, "learning_rate": 0.01, "loss": 2.0643, "step": 22083 }, { "epoch": 2.2701202590194263, "grad_norm": 0.04788368567824364, "learning_rate": 0.01, "loss": 2.0296, "step": 22086 }, { "epoch": 2.2704286154794944, "grad_norm": 0.036985646933317184, "learning_rate": 0.01, "loss": 2.0124, "step": 22089 }, { "epoch": 2.270736971939562, "grad_norm": 0.08654552698135376, "learning_rate": 0.01, "loss": 2.0268, "step": 22092 }, { "epoch": 2.27104532839963, "grad_norm": 0.12740878760814667, "learning_rate": 0.01, "loss": 2.0423, "step": 22095 }, { "epoch": 2.271353684859698, "grad_norm": 0.058829743415117264, "learning_rate": 0.01, "loss": 2.0141, "step": 22098 }, { "epoch": 2.2716620413197655, "grad_norm": 0.04173153638839722, "learning_rate": 0.01, "loss": 2.0288, "step": 22101 }, { "epoch": 2.2719703977798336, "grad_norm": 0.04551135376095772, "learning_rate": 0.01, "loss": 2.0348, "step": 22104 }, { "epoch": 2.272278754239901, "grad_norm": 0.04404577612876892, "learning_rate": 0.01, "loss": 2.0043, "step": 22107 }, { "epoch": 2.2725871106999693, "grad_norm": 0.033306755125522614, "learning_rate": 0.01, "loss": 2.0333, "step": 22110 }, { "epoch": 2.272895467160037, "grad_norm": 0.03450062498450279, "learning_rate": 0.01, "loss": 2.0188, "step": 22113 }, { "epoch": 2.273203823620105, "grad_norm": 0.04665246978402138, "learning_rate": 0.01, "loss": 2.0002, "step": 22116 }, { "epoch": 2.2735121800801728, "grad_norm": 0.04256023094058037, "learning_rate": 0.01, "loss": 1.9977, "step": 22119 }, { "epoch": 2.2738205365402404, "grad_norm": 0.04408838599920273, "learning_rate": 0.01, "loss": 2.047, "step": 22122 }, { "epoch": 2.2741288930003085, "grad_norm": 0.17131304740905762, "learning_rate": 0.01, "loss": 2.0225, "step": 22125 }, { "epoch": 2.274437249460376, "grad_norm": 0.12661625444889069, "learning_rate": 0.01, "loss": 2.0417, "step": 22128 }, { "epoch": 2.274745605920444, "grad_norm": 0.1100076287984848, "learning_rate": 0.01, "loss": 2.0526, "step": 22131 }, { "epoch": 2.275053962380512, "grad_norm": 0.06313984096050262, "learning_rate": 0.01, "loss": 2.0372, "step": 22134 }, { "epoch": 2.2753623188405796, "grad_norm": 0.04563833773136139, "learning_rate": 0.01, "loss": 2.0356, "step": 22137 }, { "epoch": 2.2756706753006477, "grad_norm": 0.034519702196121216, "learning_rate": 0.01, "loss": 2.0394, "step": 22140 }, { "epoch": 2.2759790317607154, "grad_norm": 0.033708199858665466, "learning_rate": 0.01, "loss": 2.0418, "step": 22143 }, { "epoch": 2.2762873882207835, "grad_norm": 0.036958202719688416, "learning_rate": 0.01, "loss": 2.005, "step": 22146 }, { "epoch": 2.276595744680851, "grad_norm": 0.052069418132305145, "learning_rate": 0.01, "loss": 2.0288, "step": 22149 }, { "epoch": 2.276904101140919, "grad_norm": 0.03941415995359421, "learning_rate": 0.01, "loss": 2.0391, "step": 22152 }, { "epoch": 2.277212457600987, "grad_norm": 0.041270628571510315, "learning_rate": 0.01, "loss": 2.0574, "step": 22155 }, { "epoch": 2.2775208140610546, "grad_norm": 0.05398832634091377, "learning_rate": 0.01, "loss": 2.044, "step": 22158 }, { "epoch": 2.277829170521122, "grad_norm": 0.048906486481428146, "learning_rate": 0.01, "loss": 2.046, "step": 22161 }, { "epoch": 2.2781375269811903, "grad_norm": 0.15073837339878082, "learning_rate": 0.01, "loss": 2.0425, "step": 22164 }, { "epoch": 2.278445883441258, "grad_norm": 0.06301066279411316, "learning_rate": 0.01, "loss": 2.0516, "step": 22167 }, { "epoch": 2.278754239901326, "grad_norm": 0.07525215297937393, "learning_rate": 0.01, "loss": 2.0344, "step": 22170 }, { "epoch": 2.2790625963613937, "grad_norm": 0.059356629848480225, "learning_rate": 0.01, "loss": 2.0373, "step": 22173 }, { "epoch": 2.279370952821462, "grad_norm": 0.04340675100684166, "learning_rate": 0.01, "loss": 2.0172, "step": 22176 }, { "epoch": 2.2796793092815295, "grad_norm": 0.052859678864479065, "learning_rate": 0.01, "loss": 2.0215, "step": 22179 }, { "epoch": 2.279987665741597, "grad_norm": 0.05917971953749657, "learning_rate": 0.01, "loss": 2.0286, "step": 22182 }, { "epoch": 2.2802960222016653, "grad_norm": 0.04111889749765396, "learning_rate": 0.01, "loss": 2.0502, "step": 22185 }, { "epoch": 2.280604378661733, "grad_norm": 0.06371071189641953, "learning_rate": 0.01, "loss": 2.0264, "step": 22188 }, { "epoch": 2.2809127351218006, "grad_norm": 0.039862968027591705, "learning_rate": 0.01, "loss": 2.0247, "step": 22191 }, { "epoch": 2.2812210915818687, "grad_norm": 0.0944151058793068, "learning_rate": 0.01, "loss": 2.0257, "step": 22194 }, { "epoch": 2.2815294480419364, "grad_norm": 0.041564084589481354, "learning_rate": 0.01, "loss": 2.0282, "step": 22197 }, { "epoch": 2.2818378045020045, "grad_norm": 0.1109161302447319, "learning_rate": 0.01, "loss": 1.9969, "step": 22200 }, { "epoch": 2.282146160962072, "grad_norm": 0.14463242888450623, "learning_rate": 0.01, "loss": 2.0455, "step": 22203 }, { "epoch": 2.2824545174221402, "grad_norm": 0.06254401057958603, "learning_rate": 0.01, "loss": 2.0269, "step": 22206 }, { "epoch": 2.282762873882208, "grad_norm": 0.03546387329697609, "learning_rate": 0.01, "loss": 2.0116, "step": 22209 }, { "epoch": 2.2830712303422755, "grad_norm": 0.041129205375909805, "learning_rate": 0.01, "loss": 2.0323, "step": 22212 }, { "epoch": 2.2833795868023437, "grad_norm": 0.048473335802555084, "learning_rate": 0.01, "loss": 2.03, "step": 22215 }, { "epoch": 2.2836879432624113, "grad_norm": 0.0633682832121849, "learning_rate": 0.01, "loss": 2.0277, "step": 22218 }, { "epoch": 2.283996299722479, "grad_norm": 0.06431914120912552, "learning_rate": 0.01, "loss": 2.0353, "step": 22221 }, { "epoch": 2.284304656182547, "grad_norm": 0.09050017595291138, "learning_rate": 0.01, "loss": 2.0305, "step": 22224 }, { "epoch": 2.2846130126426147, "grad_norm": 0.07363586872816086, "learning_rate": 0.01, "loss": 2.0421, "step": 22227 }, { "epoch": 2.284921369102683, "grad_norm": 0.07984726130962372, "learning_rate": 0.01, "loss": 2.0216, "step": 22230 }, { "epoch": 2.2852297255627505, "grad_norm": 0.0845586284995079, "learning_rate": 0.01, "loss": 2.0083, "step": 22233 }, { "epoch": 2.2855380820228186, "grad_norm": 0.13392622768878937, "learning_rate": 0.01, "loss": 2.0339, "step": 22236 }, { "epoch": 2.2858464384828863, "grad_norm": 0.06791893392801285, "learning_rate": 0.01, "loss": 2.0457, "step": 22239 }, { "epoch": 2.286154794942954, "grad_norm": 0.044396497309207916, "learning_rate": 0.01, "loss": 2.0408, "step": 22242 }, { "epoch": 2.286463151403022, "grad_norm": 0.04964762553572655, "learning_rate": 0.01, "loss": 2.0259, "step": 22245 }, { "epoch": 2.2867715078630897, "grad_norm": 0.06517814844846725, "learning_rate": 0.01, "loss": 2.0352, "step": 22248 }, { "epoch": 2.2870798643231574, "grad_norm": 0.043722327798604965, "learning_rate": 0.01, "loss": 2.0111, "step": 22251 }, { "epoch": 2.2873882207832255, "grad_norm": 0.06852597743272781, "learning_rate": 0.01, "loss": 2.0377, "step": 22254 }, { "epoch": 2.287696577243293, "grad_norm": 0.13437853753566742, "learning_rate": 0.01, "loss": 2.0376, "step": 22257 }, { "epoch": 2.288004933703361, "grad_norm": 0.03924357146024704, "learning_rate": 0.01, "loss": 2.0283, "step": 22260 }, { "epoch": 2.288313290163429, "grad_norm": 0.039702124893665314, "learning_rate": 0.01, "loss": 2.0293, "step": 22263 }, { "epoch": 2.288621646623497, "grad_norm": 0.05187009647488594, "learning_rate": 0.01, "loss": 1.9957, "step": 22266 }, { "epoch": 2.2889300030835646, "grad_norm": 0.0682847648859024, "learning_rate": 0.01, "loss": 2.0251, "step": 22269 }, { "epoch": 2.2892383595436323, "grad_norm": 0.046652939170598984, "learning_rate": 0.01, "loss": 2.0481, "step": 22272 }, { "epoch": 2.2895467160037004, "grad_norm": 0.04348958283662796, "learning_rate": 0.01, "loss": 2.0492, "step": 22275 }, { "epoch": 2.289855072463768, "grad_norm": 0.05141732096672058, "learning_rate": 0.01, "loss": 2.0372, "step": 22278 }, { "epoch": 2.2901634289238357, "grad_norm": 0.04899610951542854, "learning_rate": 0.01, "loss": 2.0381, "step": 22281 }, { "epoch": 2.290471785383904, "grad_norm": 0.05614173039793968, "learning_rate": 0.01, "loss": 2.0082, "step": 22284 }, { "epoch": 2.2907801418439715, "grad_norm": 0.08255594223737717, "learning_rate": 0.01, "loss": 2.0079, "step": 22287 }, { "epoch": 2.2910884983040396, "grad_norm": 0.12105574458837509, "learning_rate": 0.01, "loss": 2.0341, "step": 22290 }, { "epoch": 2.2913968547641073, "grad_norm": 0.07021155953407288, "learning_rate": 0.01, "loss": 2.0051, "step": 22293 }, { "epoch": 2.2917052112241754, "grad_norm": 0.08364082872867584, "learning_rate": 0.01, "loss": 2.0445, "step": 22296 }, { "epoch": 2.292013567684243, "grad_norm": 0.1123763918876648, "learning_rate": 0.01, "loss": 2.0298, "step": 22299 }, { "epoch": 2.2923219241443107, "grad_norm": 0.06539402157068253, "learning_rate": 0.01, "loss": 2.0245, "step": 22302 }, { "epoch": 2.292630280604379, "grad_norm": 0.08749070018529892, "learning_rate": 0.01, "loss": 2.0336, "step": 22305 }, { "epoch": 2.2929386370644464, "grad_norm": 0.05580052733421326, "learning_rate": 0.01, "loss": 2.0519, "step": 22308 }, { "epoch": 2.293246993524514, "grad_norm": 0.07866258919239044, "learning_rate": 0.01, "loss": 2.0455, "step": 22311 }, { "epoch": 2.293555349984582, "grad_norm": 0.06253038346767426, "learning_rate": 0.01, "loss": 2.0423, "step": 22314 }, { "epoch": 2.29386370644465, "grad_norm": 0.0412861630320549, "learning_rate": 0.01, "loss": 2.0504, "step": 22317 }, { "epoch": 2.294172062904718, "grad_norm": 0.05568886548280716, "learning_rate": 0.01, "loss": 2.0397, "step": 22320 }, { "epoch": 2.2944804193647856, "grad_norm": 0.060274332761764526, "learning_rate": 0.01, "loss": 2.0563, "step": 22323 }, { "epoch": 2.2947887758248537, "grad_norm": 0.09785215556621552, "learning_rate": 0.01, "loss": 2.0378, "step": 22326 }, { "epoch": 2.2950971322849214, "grad_norm": 0.06171462684869766, "learning_rate": 0.01, "loss": 2.0246, "step": 22329 }, { "epoch": 2.295405488744989, "grad_norm": 0.1034122183918953, "learning_rate": 0.01, "loss": 2.0493, "step": 22332 }, { "epoch": 2.295713845205057, "grad_norm": 0.04855991527438164, "learning_rate": 0.01, "loss": 2.0401, "step": 22335 }, { "epoch": 2.296022201665125, "grad_norm": 0.05000189319252968, "learning_rate": 0.01, "loss": 2.0111, "step": 22338 }, { "epoch": 2.2963305581251925, "grad_norm": 0.031126966699957848, "learning_rate": 0.01, "loss": 2.0248, "step": 22341 }, { "epoch": 2.2966389145852606, "grad_norm": 0.08688051998615265, "learning_rate": 0.01, "loss": 2.0227, "step": 22344 }, { "epoch": 2.2969472710453283, "grad_norm": 0.08918049931526184, "learning_rate": 0.01, "loss": 2.0573, "step": 22347 }, { "epoch": 2.2972556275053964, "grad_norm": 0.0592593289911747, "learning_rate": 0.01, "loss": 2.0042, "step": 22350 }, { "epoch": 2.297563983965464, "grad_norm": 0.11501101404428482, "learning_rate": 0.01, "loss": 2.0348, "step": 22353 }, { "epoch": 2.297872340425532, "grad_norm": 0.12966668605804443, "learning_rate": 0.01, "loss": 2.0475, "step": 22356 }, { "epoch": 2.2981806968856, "grad_norm": 0.08889560401439667, "learning_rate": 0.01, "loss": 2.0499, "step": 22359 }, { "epoch": 2.2984890533456674, "grad_norm": 0.07578025758266449, "learning_rate": 0.01, "loss": 2.0479, "step": 22362 }, { "epoch": 2.2987974098057355, "grad_norm": 0.04283175244927406, "learning_rate": 0.01, "loss": 2.0009, "step": 22365 }, { "epoch": 2.299105766265803, "grad_norm": 0.038169119507074356, "learning_rate": 0.01, "loss": 2.0506, "step": 22368 }, { "epoch": 2.299414122725871, "grad_norm": 0.07047520577907562, "learning_rate": 0.01, "loss": 2.0435, "step": 22371 }, { "epoch": 2.299722479185939, "grad_norm": 0.06367038190364838, "learning_rate": 0.01, "loss": 2.0186, "step": 22374 }, { "epoch": 2.3000308356460066, "grad_norm": 0.06601911783218384, "learning_rate": 0.01, "loss": 2.026, "step": 22377 }, { "epoch": 2.3003391921060747, "grad_norm": 0.06333800405263901, "learning_rate": 0.01, "loss": 2.0244, "step": 22380 }, { "epoch": 2.3006475485661424, "grad_norm": 0.0635596215724945, "learning_rate": 0.01, "loss": 2.0102, "step": 22383 }, { "epoch": 2.3009559050262105, "grad_norm": 0.09258962422609329, "learning_rate": 0.01, "loss": 2.0348, "step": 22386 }, { "epoch": 2.301264261486278, "grad_norm": 0.04462733119726181, "learning_rate": 0.01, "loss": 1.9775, "step": 22389 }, { "epoch": 2.301572617946346, "grad_norm": 0.04802479222416878, "learning_rate": 0.01, "loss": 2.0557, "step": 22392 }, { "epoch": 2.301880974406414, "grad_norm": 0.0396190769970417, "learning_rate": 0.01, "loss": 2.0543, "step": 22395 }, { "epoch": 2.3021893308664816, "grad_norm": 0.05610959604382515, "learning_rate": 0.01, "loss": 2.048, "step": 22398 }, { "epoch": 2.3024976873265492, "grad_norm": 0.03454664349555969, "learning_rate": 0.01, "loss": 2.0592, "step": 22401 }, { "epoch": 2.3028060437866174, "grad_norm": 0.049920883029699326, "learning_rate": 0.01, "loss": 2.0533, "step": 22404 }, { "epoch": 2.303114400246685, "grad_norm": 0.04548259451985359, "learning_rate": 0.01, "loss": 2.045, "step": 22407 }, { "epoch": 2.303422756706753, "grad_norm": 0.07624956220388412, "learning_rate": 0.01, "loss": 2.0321, "step": 22410 }, { "epoch": 2.3037311131668208, "grad_norm": 0.1172046959400177, "learning_rate": 0.01, "loss": 2.0285, "step": 22413 }, { "epoch": 2.304039469626889, "grad_norm": 0.07652290165424347, "learning_rate": 0.01, "loss": 2.0324, "step": 22416 }, { "epoch": 2.3043478260869565, "grad_norm": 0.07458405196666718, "learning_rate": 0.01, "loss": 2.0351, "step": 22419 }, { "epoch": 2.304656182547024, "grad_norm": 0.08384053409099579, "learning_rate": 0.01, "loss": 2.0457, "step": 22422 }, { "epoch": 2.3049645390070923, "grad_norm": 0.0492565892636776, "learning_rate": 0.01, "loss": 2.0353, "step": 22425 }, { "epoch": 2.30527289546716, "grad_norm": 0.05125366151332855, "learning_rate": 0.01, "loss": 2.0447, "step": 22428 }, { "epoch": 2.3055812519272276, "grad_norm": 0.03920417279005051, "learning_rate": 0.01, "loss": 2.0471, "step": 22431 }, { "epoch": 2.3058896083872957, "grad_norm": 0.07206401228904724, "learning_rate": 0.01, "loss": 2.0312, "step": 22434 }, { "epoch": 2.3061979648473634, "grad_norm": 0.08888402581214905, "learning_rate": 0.01, "loss": 2.0331, "step": 22437 }, { "epoch": 2.3065063213074315, "grad_norm": 0.1788371056318283, "learning_rate": 0.01, "loss": 2.0461, "step": 22440 }, { "epoch": 2.306814677767499, "grad_norm": 0.11030272394418716, "learning_rate": 0.01, "loss": 2.0314, "step": 22443 }, { "epoch": 2.3071230342275673, "grad_norm": 0.08602694422006607, "learning_rate": 0.01, "loss": 2.0101, "step": 22446 }, { "epoch": 2.307431390687635, "grad_norm": 0.07269123196601868, "learning_rate": 0.01, "loss": 2.0068, "step": 22449 }, { "epoch": 2.3077397471477026, "grad_norm": 0.043196290731430054, "learning_rate": 0.01, "loss": 2.0617, "step": 22452 }, { "epoch": 2.3080481036077707, "grad_norm": 0.03932726010680199, "learning_rate": 0.01, "loss": 2.0314, "step": 22455 }, { "epoch": 2.3083564600678383, "grad_norm": 0.07751597464084625, "learning_rate": 0.01, "loss": 2.0225, "step": 22458 }, { "epoch": 2.3086648165279064, "grad_norm": 0.06549305468797684, "learning_rate": 0.01, "loss": 2.0386, "step": 22461 }, { "epoch": 2.308973172987974, "grad_norm": 0.05819348245859146, "learning_rate": 0.01, "loss": 2.0343, "step": 22464 }, { "epoch": 2.3092815294480418, "grad_norm": 0.04251622408628464, "learning_rate": 0.01, "loss": 2.02, "step": 22467 }, { "epoch": 2.30958988590811, "grad_norm": 0.03997926414012909, "learning_rate": 0.01, "loss": 2.0405, "step": 22470 }, { "epoch": 2.3098982423681775, "grad_norm": 0.060847315937280655, "learning_rate": 0.01, "loss": 2.0301, "step": 22473 }, { "epoch": 2.3102065988282456, "grad_norm": 0.188557431101799, "learning_rate": 0.01, "loss": 2.0388, "step": 22476 }, { "epoch": 2.3105149552883133, "grad_norm": 0.050608254969120026, "learning_rate": 0.01, "loss": 2.0376, "step": 22479 }, { "epoch": 2.310823311748381, "grad_norm": 0.0930873304605484, "learning_rate": 0.01, "loss": 2.0329, "step": 22482 }, { "epoch": 2.311131668208449, "grad_norm": 0.05231544002890587, "learning_rate": 0.01, "loss": 2.0646, "step": 22485 }, { "epoch": 2.3114400246685167, "grad_norm": 0.038996320217847824, "learning_rate": 0.01, "loss": 2.0198, "step": 22488 }, { "epoch": 2.311748381128585, "grad_norm": 0.04804287478327751, "learning_rate": 0.01, "loss": 2.0377, "step": 22491 }, { "epoch": 2.3120567375886525, "grad_norm": 0.05822224169969559, "learning_rate": 0.01, "loss": 2.0581, "step": 22494 }, { "epoch": 2.31236509404872, "grad_norm": 0.04677027836441994, "learning_rate": 0.01, "loss": 2.0552, "step": 22497 }, { "epoch": 2.3126734505087883, "grad_norm": 0.05270523950457573, "learning_rate": 0.01, "loss": 1.9882, "step": 22500 }, { "epoch": 2.312981806968856, "grad_norm": 0.04068690538406372, "learning_rate": 0.01, "loss": 2.0495, "step": 22503 }, { "epoch": 2.313290163428924, "grad_norm": 0.03488868847489357, "learning_rate": 0.01, "loss": 2.0382, "step": 22506 }, { "epoch": 2.3135985198889917, "grad_norm": 0.1007746085524559, "learning_rate": 0.01, "loss": 2.0356, "step": 22509 }, { "epoch": 2.3139068763490593, "grad_norm": 0.04351113364100456, "learning_rate": 0.01, "loss": 2.0513, "step": 22512 }, { "epoch": 2.3142152328091274, "grad_norm": 0.042396917939186096, "learning_rate": 0.01, "loss": 2.0427, "step": 22515 }, { "epoch": 2.314523589269195, "grad_norm": 0.05541342496871948, "learning_rate": 0.01, "loss": 2.0577, "step": 22518 }, { "epoch": 2.314831945729263, "grad_norm": 0.05093314126133919, "learning_rate": 0.01, "loss": 2.0127, "step": 22521 }, { "epoch": 2.315140302189331, "grad_norm": 0.09227500855922699, "learning_rate": 0.01, "loss": 2.0303, "step": 22524 }, { "epoch": 2.3154486586493985, "grad_norm": 0.07242649048566818, "learning_rate": 0.01, "loss": 2.0355, "step": 22527 }, { "epoch": 2.3157570151094666, "grad_norm": 0.07468149811029434, "learning_rate": 0.01, "loss": 2.0443, "step": 22530 }, { "epoch": 2.3160653715695343, "grad_norm": 0.0861506536602974, "learning_rate": 0.01, "loss": 2.0461, "step": 22533 }, { "epoch": 2.3163737280296024, "grad_norm": 0.05024004355072975, "learning_rate": 0.01, "loss": 2.0228, "step": 22536 }, { "epoch": 2.31668208448967, "grad_norm": 0.07255478948354721, "learning_rate": 0.01, "loss": 1.997, "step": 22539 }, { "epoch": 2.3169904409497377, "grad_norm": 0.08199785649776459, "learning_rate": 0.01, "loss": 2.0465, "step": 22542 }, { "epoch": 2.317298797409806, "grad_norm": 0.05194063484668732, "learning_rate": 0.01, "loss": 2.0426, "step": 22545 }, { "epoch": 2.3176071538698735, "grad_norm": 0.05785224214196205, "learning_rate": 0.01, "loss": 2.0311, "step": 22548 }, { "epoch": 2.3179155103299416, "grad_norm": 0.0767458900809288, "learning_rate": 0.01, "loss": 2.0338, "step": 22551 }, { "epoch": 2.3182238667900092, "grad_norm": 0.04905517399311066, "learning_rate": 0.01, "loss": 2.0369, "step": 22554 }, { "epoch": 2.318532223250077, "grad_norm": 0.12444967031478882, "learning_rate": 0.01, "loss": 1.9988, "step": 22557 }, { "epoch": 2.318840579710145, "grad_norm": 0.07965657114982605, "learning_rate": 0.01, "loss": 2.0438, "step": 22560 }, { "epoch": 2.3191489361702127, "grad_norm": 0.12167848646640778, "learning_rate": 0.01, "loss": 2.0359, "step": 22563 }, { "epoch": 2.3194572926302808, "grad_norm": 0.05483706668019295, "learning_rate": 0.01, "loss": 2.034, "step": 22566 }, { "epoch": 2.3197656490903484, "grad_norm": 0.043046776205301285, "learning_rate": 0.01, "loss": 2.0132, "step": 22569 }, { "epoch": 2.320074005550416, "grad_norm": 0.03646457940340042, "learning_rate": 0.01, "loss": 1.9867, "step": 22572 }, { "epoch": 2.320382362010484, "grad_norm": 0.05777902901172638, "learning_rate": 0.01, "loss": 2.0397, "step": 22575 }, { "epoch": 2.320690718470552, "grad_norm": 0.08646814525127411, "learning_rate": 0.01, "loss": 2.0294, "step": 22578 }, { "epoch": 2.32099907493062, "grad_norm": 0.10770904272794724, "learning_rate": 0.01, "loss": 2.0168, "step": 22581 }, { "epoch": 2.3213074313906876, "grad_norm": 0.11546061187982559, "learning_rate": 0.01, "loss": 2.0281, "step": 22584 }, { "epoch": 2.3216157878507553, "grad_norm": 0.08060076832771301, "learning_rate": 0.01, "loss": 2.0378, "step": 22587 }, { "epoch": 2.3219241443108234, "grad_norm": 0.05546190217137337, "learning_rate": 0.01, "loss": 2.0361, "step": 22590 }, { "epoch": 2.322232500770891, "grad_norm": 0.06310277432203293, "learning_rate": 0.01, "loss": 2.0261, "step": 22593 }, { "epoch": 2.322540857230959, "grad_norm": 0.037245072424411774, "learning_rate": 0.01, "loss": 2.038, "step": 22596 }, { "epoch": 2.322849213691027, "grad_norm": 0.05082730948925018, "learning_rate": 0.01, "loss": 2.0327, "step": 22599 }, { "epoch": 2.3231575701510945, "grad_norm": 0.054748065769672394, "learning_rate": 0.01, "loss": 2.0356, "step": 22602 }, { "epoch": 2.3234659266111626, "grad_norm": 0.06093902140855789, "learning_rate": 0.01, "loss": 2.0339, "step": 22605 }, { "epoch": 2.3237742830712302, "grad_norm": 0.10238637775182724, "learning_rate": 0.01, "loss": 2.0559, "step": 22608 }, { "epoch": 2.3240826395312983, "grad_norm": 0.0850609764456749, "learning_rate": 0.01, "loss": 2.0337, "step": 22611 }, { "epoch": 2.324390995991366, "grad_norm": 0.04108014330267906, "learning_rate": 0.01, "loss": 2.045, "step": 22614 }, { "epoch": 2.3246993524514337, "grad_norm": 0.03669281303882599, "learning_rate": 0.01, "loss": 2.0276, "step": 22617 }, { "epoch": 2.3250077089115018, "grad_norm": 0.053524449467659, "learning_rate": 0.01, "loss": 2.0076, "step": 22620 }, { "epoch": 2.3253160653715694, "grad_norm": 0.03545799478888512, "learning_rate": 0.01, "loss": 2.03, "step": 22623 }, { "epoch": 2.3256244218316375, "grad_norm": 0.0569877028465271, "learning_rate": 0.01, "loss": 2.0189, "step": 22626 }, { "epoch": 2.325932778291705, "grad_norm": 0.06943153589963913, "learning_rate": 0.01, "loss": 2.0183, "step": 22629 }, { "epoch": 2.326241134751773, "grad_norm": 0.07389956712722778, "learning_rate": 0.01, "loss": 2.0524, "step": 22632 }, { "epoch": 2.326549491211841, "grad_norm": 0.08397506922483444, "learning_rate": 0.01, "loss": 2.0214, "step": 22635 }, { "epoch": 2.3268578476719086, "grad_norm": 0.13598600029945374, "learning_rate": 0.01, "loss": 2.0276, "step": 22638 }, { "epoch": 2.3271662041319767, "grad_norm": 0.08953214436769485, "learning_rate": 0.01, "loss": 2.0282, "step": 22641 }, { "epoch": 2.3274745605920444, "grad_norm": 0.09812797605991364, "learning_rate": 0.01, "loss": 2.0631, "step": 22644 }, { "epoch": 2.3277829170521125, "grad_norm": 0.06696080416440964, "learning_rate": 0.01, "loss": 2.0314, "step": 22647 }, { "epoch": 2.32809127351218, "grad_norm": 0.07981070876121521, "learning_rate": 0.01, "loss": 2.0209, "step": 22650 }, { "epoch": 2.328399629972248, "grad_norm": 0.06655466556549072, "learning_rate": 0.01, "loss": 2.0426, "step": 22653 }, { "epoch": 2.328707986432316, "grad_norm": 0.061447955667972565, "learning_rate": 0.01, "loss": 2.0349, "step": 22656 }, { "epoch": 2.3290163428923836, "grad_norm": 0.08812141418457031, "learning_rate": 0.01, "loss": 2.0471, "step": 22659 }, { "epoch": 2.3293246993524512, "grad_norm": 0.04082084447145462, "learning_rate": 0.01, "loss": 2.0435, "step": 22662 }, { "epoch": 2.3296330558125193, "grad_norm": 0.04827709123492241, "learning_rate": 0.01, "loss": 2.0344, "step": 22665 }, { "epoch": 2.329941412272587, "grad_norm": 0.04531668871641159, "learning_rate": 0.01, "loss": 2.01, "step": 22668 }, { "epoch": 2.330249768732655, "grad_norm": 0.06739286333322525, "learning_rate": 0.01, "loss": 2.0479, "step": 22671 }, { "epoch": 2.3305581251927228, "grad_norm": 0.07753113657236099, "learning_rate": 0.01, "loss": 2.0454, "step": 22674 }, { "epoch": 2.330866481652791, "grad_norm": 0.1572093814611435, "learning_rate": 0.01, "loss": 2.0089, "step": 22677 }, { "epoch": 2.3311748381128585, "grad_norm": 0.0898297056555748, "learning_rate": 0.01, "loss": 2.0434, "step": 22680 }, { "epoch": 2.331483194572926, "grad_norm": 0.0803251639008522, "learning_rate": 0.01, "loss": 2.0421, "step": 22683 }, { "epoch": 2.3317915510329943, "grad_norm": 0.06155823543667793, "learning_rate": 0.01, "loss": 2.0106, "step": 22686 }, { "epoch": 2.332099907493062, "grad_norm": 0.03681538626551628, "learning_rate": 0.01, "loss": 2.0082, "step": 22689 }, { "epoch": 2.3324082639531296, "grad_norm": 0.03595598414540291, "learning_rate": 0.01, "loss": 2.0563, "step": 22692 }, { "epoch": 2.3327166204131977, "grad_norm": 0.06950604170560837, "learning_rate": 0.01, "loss": 2.0413, "step": 22695 }, { "epoch": 2.3330249768732654, "grad_norm": 0.07386364042758942, "learning_rate": 0.01, "loss": 2.0261, "step": 22698 }, { "epoch": 2.3333333333333335, "grad_norm": 0.06711214780807495, "learning_rate": 0.01, "loss": 2.0418, "step": 22701 }, { "epoch": 2.333641689793401, "grad_norm": 0.07357903569936752, "learning_rate": 0.01, "loss": 2.0271, "step": 22704 }, { "epoch": 2.3339500462534692, "grad_norm": 0.0660550519824028, "learning_rate": 0.01, "loss": 2.0242, "step": 22707 }, { "epoch": 2.334258402713537, "grad_norm": 0.05066053569316864, "learning_rate": 0.01, "loss": 2.0117, "step": 22710 }, { "epoch": 2.3345667591736046, "grad_norm": 0.04992471635341644, "learning_rate": 0.01, "loss": 2.0764, "step": 22713 }, { "epoch": 2.3348751156336727, "grad_norm": 0.03477726876735687, "learning_rate": 0.01, "loss": 2.0239, "step": 22716 }, { "epoch": 2.3351834720937403, "grad_norm": 0.05454208329319954, "learning_rate": 0.01, "loss": 2.0476, "step": 22719 }, { "epoch": 2.335491828553808, "grad_norm": 0.09009736031293869, "learning_rate": 0.01, "loss": 2.0026, "step": 22722 }, { "epoch": 2.335800185013876, "grad_norm": 0.10271045565605164, "learning_rate": 0.01, "loss": 2.0514, "step": 22725 }, { "epoch": 2.3361085414739438, "grad_norm": 0.05939007177948952, "learning_rate": 0.01, "loss": 2.0436, "step": 22728 }, { "epoch": 2.336416897934012, "grad_norm": 0.06454264372587204, "learning_rate": 0.01, "loss": 2.0334, "step": 22731 }, { "epoch": 2.3367252543940795, "grad_norm": 0.04892132803797722, "learning_rate": 0.01, "loss": 2.0305, "step": 22734 }, { "epoch": 2.3370336108541476, "grad_norm": 0.060863394290208817, "learning_rate": 0.01, "loss": 2.0365, "step": 22737 }, { "epoch": 2.3373419673142153, "grad_norm": 0.06073556840419769, "learning_rate": 0.01, "loss": 2.028, "step": 22740 }, { "epoch": 2.337650323774283, "grad_norm": 0.04620682820677757, "learning_rate": 0.01, "loss": 2.034, "step": 22743 }, { "epoch": 2.337958680234351, "grad_norm": 0.04834865778684616, "learning_rate": 0.01, "loss": 2.0407, "step": 22746 }, { "epoch": 2.3382670366944187, "grad_norm": 0.08154502511024475, "learning_rate": 0.01, "loss": 2.0477, "step": 22749 }, { "epoch": 2.3385753931544864, "grad_norm": 0.062378283590078354, "learning_rate": 0.01, "loss": 2.0256, "step": 22752 }, { "epoch": 2.3388837496145545, "grad_norm": 0.11560855805873871, "learning_rate": 0.01, "loss": 2.0059, "step": 22755 }, { "epoch": 2.339192106074622, "grad_norm": 0.14084307849407196, "learning_rate": 0.01, "loss": 2.0179, "step": 22758 }, { "epoch": 2.3395004625346902, "grad_norm": 0.08354218304157257, "learning_rate": 0.01, "loss": 2.0344, "step": 22761 }, { "epoch": 2.339808818994758, "grad_norm": 0.04660021886229515, "learning_rate": 0.01, "loss": 2.0225, "step": 22764 }, { "epoch": 2.340117175454826, "grad_norm": 0.06319580972194672, "learning_rate": 0.01, "loss": 2.0438, "step": 22767 }, { "epoch": 2.3404255319148937, "grad_norm": 0.06721585988998413, "learning_rate": 0.01, "loss": 2.0112, "step": 22770 }, { "epoch": 2.3407338883749613, "grad_norm": 0.08593994379043579, "learning_rate": 0.01, "loss": 2.0149, "step": 22773 }, { "epoch": 2.3410422448350294, "grad_norm": 0.07594014704227448, "learning_rate": 0.01, "loss": 2.045, "step": 22776 }, { "epoch": 2.341350601295097, "grad_norm": 0.05942634493112564, "learning_rate": 0.01, "loss": 2.0557, "step": 22779 }, { "epoch": 2.3416589577551647, "grad_norm": 0.09516235440969467, "learning_rate": 0.01, "loss": 2.0524, "step": 22782 }, { "epoch": 2.341967314215233, "grad_norm": 0.11480774730443954, "learning_rate": 0.01, "loss": 2.0112, "step": 22785 }, { "epoch": 2.3422756706753005, "grad_norm": 0.078018918633461, "learning_rate": 0.01, "loss": 2.0083, "step": 22788 }, { "epoch": 2.3425840271353686, "grad_norm": 0.04268670827150345, "learning_rate": 0.01, "loss": 1.9939, "step": 22791 }, { "epoch": 2.3428923835954363, "grad_norm": 0.04221808537840843, "learning_rate": 0.01, "loss": 2.0198, "step": 22794 }, { "epoch": 2.3432007400555044, "grad_norm": 0.04617379978299141, "learning_rate": 0.01, "loss": 2.036, "step": 22797 }, { "epoch": 2.343509096515572, "grad_norm": 0.05600395053625107, "learning_rate": 0.01, "loss": 2.0289, "step": 22800 }, { "epoch": 2.3438174529756397, "grad_norm": 0.04997321218252182, "learning_rate": 0.01, "loss": 2.0396, "step": 22803 }, { "epoch": 2.344125809435708, "grad_norm": 0.08891329169273376, "learning_rate": 0.01, "loss": 2.0464, "step": 22806 }, { "epoch": 2.3444341658957755, "grad_norm": 0.06883488595485687, "learning_rate": 0.01, "loss": 2.0417, "step": 22809 }, { "epoch": 2.344742522355843, "grad_norm": 0.05532229691743851, "learning_rate": 0.01, "loss": 2.0367, "step": 22812 }, { "epoch": 2.3450508788159112, "grad_norm": 0.04480346292257309, "learning_rate": 0.01, "loss": 2.036, "step": 22815 }, { "epoch": 2.345359235275979, "grad_norm": 0.06533756107091904, "learning_rate": 0.01, "loss": 2.0218, "step": 22818 }, { "epoch": 2.345667591736047, "grad_norm": 0.09851729869842529, "learning_rate": 0.01, "loss": 2.0175, "step": 22821 }, { "epoch": 2.3459759481961147, "grad_norm": 0.06026868894696236, "learning_rate": 0.01, "loss": 2.0359, "step": 22824 }, { "epoch": 2.3462843046561828, "grad_norm": 0.10084753483533859, "learning_rate": 0.01, "loss": 2.0348, "step": 22827 }, { "epoch": 2.3465926611162504, "grad_norm": 0.09302054345607758, "learning_rate": 0.01, "loss": 2.0376, "step": 22830 }, { "epoch": 2.346901017576318, "grad_norm": 0.06304951757192612, "learning_rate": 0.01, "loss": 2.0254, "step": 22833 }, { "epoch": 2.347209374036386, "grad_norm": 0.03625926375389099, "learning_rate": 0.01, "loss": 2.037, "step": 22836 }, { "epoch": 2.347517730496454, "grad_norm": 0.0602838359773159, "learning_rate": 0.01, "loss": 2.0258, "step": 22839 }, { "epoch": 2.3478260869565215, "grad_norm": 0.08285371959209442, "learning_rate": 0.01, "loss": 2.0299, "step": 22842 }, { "epoch": 2.3481344434165896, "grad_norm": 0.05780784785747528, "learning_rate": 0.01, "loss": 2.0288, "step": 22845 }, { "epoch": 2.3484427998766573, "grad_norm": 0.07491873949766159, "learning_rate": 0.01, "loss": 2.0327, "step": 22848 }, { "epoch": 2.3487511563367254, "grad_norm": 0.04745258018374443, "learning_rate": 0.01, "loss": 2.0347, "step": 22851 }, { "epoch": 2.349059512796793, "grad_norm": 0.030006757006049156, "learning_rate": 0.01, "loss": 2.0315, "step": 22854 }, { "epoch": 2.349367869256861, "grad_norm": 0.035286273807287216, "learning_rate": 0.01, "loss": 2.0198, "step": 22857 }, { "epoch": 2.349676225716929, "grad_norm": 0.08970153331756592, "learning_rate": 0.01, "loss": 2.0473, "step": 22860 }, { "epoch": 2.3499845821769965, "grad_norm": 0.06819456070661545, "learning_rate": 0.01, "loss": 2.0246, "step": 22863 }, { "epoch": 2.3502929386370646, "grad_norm": 0.07130144536495209, "learning_rate": 0.01, "loss": 2.019, "step": 22866 }, { "epoch": 2.3506012950971322, "grad_norm": 0.046634428203105927, "learning_rate": 0.01, "loss": 2.0338, "step": 22869 }, { "epoch": 2.3509096515572, "grad_norm": 0.10333798825740814, "learning_rate": 0.01, "loss": 2.0348, "step": 22872 }, { "epoch": 2.351218008017268, "grad_norm": 0.06691887974739075, "learning_rate": 0.01, "loss": 2.0254, "step": 22875 }, { "epoch": 2.3515263644773357, "grad_norm": 0.06780122220516205, "learning_rate": 0.01, "loss": 2.052, "step": 22878 }, { "epoch": 2.3518347209374038, "grad_norm": 0.04448529705405235, "learning_rate": 0.01, "loss": 2.0322, "step": 22881 }, { "epoch": 2.3521430773974714, "grad_norm": 0.09176638722419739, "learning_rate": 0.01, "loss": 2.0168, "step": 22884 }, { "epoch": 2.3524514338575395, "grad_norm": 0.05699750408530235, "learning_rate": 0.01, "loss": 2.0241, "step": 22887 }, { "epoch": 2.352759790317607, "grad_norm": 0.0556836873292923, "learning_rate": 0.01, "loss": 2.0328, "step": 22890 }, { "epoch": 2.353068146777675, "grad_norm": 0.05335136875510216, "learning_rate": 0.01, "loss": 2.0072, "step": 22893 }, { "epoch": 2.353376503237743, "grad_norm": 0.06900188326835632, "learning_rate": 0.01, "loss": 2.0392, "step": 22896 }, { "epoch": 2.3536848596978106, "grad_norm": 0.07300775498151779, "learning_rate": 0.01, "loss": 2.0341, "step": 22899 }, { "epoch": 2.3539932161578783, "grad_norm": 0.0912926122546196, "learning_rate": 0.01, "loss": 2.041, "step": 22902 }, { "epoch": 2.3543015726179464, "grad_norm": 0.0803481861948967, "learning_rate": 0.01, "loss": 2.0225, "step": 22905 }, { "epoch": 2.354609929078014, "grad_norm": 0.05067334324121475, "learning_rate": 0.01, "loss": 2.0187, "step": 22908 }, { "epoch": 2.354918285538082, "grad_norm": 0.10282410681247711, "learning_rate": 0.01, "loss": 2.0349, "step": 22911 }, { "epoch": 2.35522664199815, "grad_norm": 0.04224551469087601, "learning_rate": 0.01, "loss": 2.0166, "step": 22914 }, { "epoch": 2.355534998458218, "grad_norm": 0.06768529862165451, "learning_rate": 0.01, "loss": 2.0304, "step": 22917 }, { "epoch": 2.3558433549182856, "grad_norm": 0.10372413694858551, "learning_rate": 0.01, "loss": 2.0421, "step": 22920 }, { "epoch": 2.356151711378353, "grad_norm": 0.05974859744310379, "learning_rate": 0.01, "loss": 2.0231, "step": 22923 }, { "epoch": 2.3564600678384213, "grad_norm": 0.06408294290304184, "learning_rate": 0.01, "loss": 2.0199, "step": 22926 }, { "epoch": 2.356768424298489, "grad_norm": 0.053037382662296295, "learning_rate": 0.01, "loss": 2.0286, "step": 22929 }, { "epoch": 2.3570767807585566, "grad_norm": 0.038871798664331436, "learning_rate": 0.01, "loss": 2.0588, "step": 22932 }, { "epoch": 2.3573851372186247, "grad_norm": 0.046300627291202545, "learning_rate": 0.01, "loss": 2.033, "step": 22935 }, { "epoch": 2.3576934936786924, "grad_norm": 0.05344367399811745, "learning_rate": 0.01, "loss": 2.0462, "step": 22938 }, { "epoch": 2.3580018501387605, "grad_norm": 0.0878051146864891, "learning_rate": 0.01, "loss": 2.0173, "step": 22941 }, { "epoch": 2.358310206598828, "grad_norm": 0.09469857066869736, "learning_rate": 0.01, "loss": 2.0373, "step": 22944 }, { "epoch": 2.3586185630588963, "grad_norm": 0.04746483266353607, "learning_rate": 0.01, "loss": 2.0155, "step": 22947 }, { "epoch": 2.358926919518964, "grad_norm": 0.051294367760419846, "learning_rate": 0.01, "loss": 2.0452, "step": 22950 }, { "epoch": 2.3592352759790316, "grad_norm": 0.04378626495599747, "learning_rate": 0.01, "loss": 2.0381, "step": 22953 }, { "epoch": 2.3595436324390997, "grad_norm": 0.054627690464258194, "learning_rate": 0.01, "loss": 2.0592, "step": 22956 }, { "epoch": 2.3598519888991674, "grad_norm": 0.04732866585254669, "learning_rate": 0.01, "loss": 2.0284, "step": 22959 }, { "epoch": 2.3601603453592355, "grad_norm": 0.11569249629974365, "learning_rate": 0.01, "loss": 2.007, "step": 22962 }, { "epoch": 2.360468701819303, "grad_norm": 0.10606824606657028, "learning_rate": 0.01, "loss": 2.0357, "step": 22965 }, { "epoch": 2.360777058279371, "grad_norm": 0.05072540044784546, "learning_rate": 0.01, "loss": 2.0581, "step": 22968 }, { "epoch": 2.361085414739439, "grad_norm": 0.0738227441906929, "learning_rate": 0.01, "loss": 2.0305, "step": 22971 }, { "epoch": 2.3613937711995066, "grad_norm": 0.06440632790327072, "learning_rate": 0.01, "loss": 2.0315, "step": 22974 }, { "epoch": 2.3617021276595747, "grad_norm": 0.05426304414868355, "learning_rate": 0.01, "loss": 2.043, "step": 22977 }, { "epoch": 2.3620104841196423, "grad_norm": 0.06414210051298141, "learning_rate": 0.01, "loss": 2.0735, "step": 22980 }, { "epoch": 2.36231884057971, "grad_norm": 0.034726858139038086, "learning_rate": 0.01, "loss": 2.0422, "step": 22983 }, { "epoch": 2.362627197039778, "grad_norm": 0.04326315596699715, "learning_rate": 0.01, "loss": 2.0422, "step": 22986 }, { "epoch": 2.3629355534998457, "grad_norm": 0.06470071524381638, "learning_rate": 0.01, "loss": 2.0192, "step": 22989 }, { "epoch": 2.363243909959914, "grad_norm": 0.09161586314439774, "learning_rate": 0.01, "loss": 2.02, "step": 22992 }, { "epoch": 2.3635522664199815, "grad_norm": 0.035171300172805786, "learning_rate": 0.01, "loss": 2.0413, "step": 22995 }, { "epoch": 2.363860622880049, "grad_norm": 0.059095609933137894, "learning_rate": 0.01, "loss": 2.0237, "step": 22998 }, { "epoch": 2.3641689793401173, "grad_norm": 0.094328872859478, "learning_rate": 0.01, "loss": 2.0362, "step": 23001 }, { "epoch": 2.364477335800185, "grad_norm": 0.059930335730314255, "learning_rate": 0.01, "loss": 2.0274, "step": 23004 }, { "epoch": 2.364785692260253, "grad_norm": 0.08969483524560928, "learning_rate": 0.01, "loss": 2.04, "step": 23007 }, { "epoch": 2.3650940487203207, "grad_norm": 0.054753273725509644, "learning_rate": 0.01, "loss": 2.0195, "step": 23010 }, { "epoch": 2.3654024051803884, "grad_norm": 0.09794525057077408, "learning_rate": 0.01, "loss": 2.0385, "step": 23013 }, { "epoch": 2.3657107616404565, "grad_norm": 0.07700082659721375, "learning_rate": 0.01, "loss": 2.0483, "step": 23016 }, { "epoch": 2.366019118100524, "grad_norm": 0.03871360793709755, "learning_rate": 0.01, "loss": 2.0255, "step": 23019 }, { "epoch": 2.3663274745605922, "grad_norm": 0.08822629600763321, "learning_rate": 0.01, "loss": 2.0296, "step": 23022 }, { "epoch": 2.36663583102066, "grad_norm": 0.08018068969249725, "learning_rate": 0.01, "loss": 2.0405, "step": 23025 }, { "epoch": 2.3669441874807275, "grad_norm": 0.046322405338287354, "learning_rate": 0.01, "loss": 2.0345, "step": 23028 }, { "epoch": 2.3672525439407957, "grad_norm": 0.08056683838367462, "learning_rate": 0.01, "loss": 2.035, "step": 23031 }, { "epoch": 2.3675609004008633, "grad_norm": 0.059787072241306305, "learning_rate": 0.01, "loss": 2.0163, "step": 23034 }, { "epoch": 2.3678692568609314, "grad_norm": 0.07793419063091278, "learning_rate": 0.01, "loss": 2.0039, "step": 23037 }, { "epoch": 2.368177613320999, "grad_norm": 0.1337706595659256, "learning_rate": 0.01, "loss": 2.0138, "step": 23040 }, { "epoch": 2.3684859697810667, "grad_norm": 0.0608229860663414, "learning_rate": 0.01, "loss": 2.0064, "step": 23043 }, { "epoch": 2.368794326241135, "grad_norm": 0.07523812353610992, "learning_rate": 0.01, "loss": 2.0243, "step": 23046 }, { "epoch": 2.3691026827012025, "grad_norm": 0.05788956955075264, "learning_rate": 0.01, "loss": 2.0354, "step": 23049 }, { "epoch": 2.3694110391612706, "grad_norm": 0.05372630059719086, "learning_rate": 0.01, "loss": 2.0188, "step": 23052 }, { "epoch": 2.3697193956213383, "grad_norm": 0.0775051936507225, "learning_rate": 0.01, "loss": 2.0018, "step": 23055 }, { "epoch": 2.370027752081406, "grad_norm": 0.042055148631334305, "learning_rate": 0.01, "loss": 2.0152, "step": 23058 }, { "epoch": 2.370336108541474, "grad_norm": 0.03500501438975334, "learning_rate": 0.01, "loss": 2.0111, "step": 23061 }, { "epoch": 2.3706444650015417, "grad_norm": 0.05405488237738609, "learning_rate": 0.01, "loss": 2.0569, "step": 23064 }, { "epoch": 2.37095282146161, "grad_norm": 0.04199660196900368, "learning_rate": 0.01, "loss": 2.024, "step": 23067 }, { "epoch": 2.3712611779216775, "grad_norm": 0.10155687481164932, "learning_rate": 0.01, "loss": 2.0335, "step": 23070 }, { "epoch": 2.371569534381745, "grad_norm": 0.04267631843686104, "learning_rate": 0.01, "loss": 2.0313, "step": 23073 }, { "epoch": 2.371877890841813, "grad_norm": 0.08961319178342819, "learning_rate": 0.01, "loss": 2.0308, "step": 23076 }, { "epoch": 2.372186247301881, "grad_norm": 0.06389858573675156, "learning_rate": 0.01, "loss": 2.02, "step": 23079 }, { "epoch": 2.372494603761949, "grad_norm": 0.0733552873134613, "learning_rate": 0.01, "loss": 2.0304, "step": 23082 }, { "epoch": 2.3728029602220166, "grad_norm": 0.04577986150979996, "learning_rate": 0.01, "loss": 2.0137, "step": 23085 }, { "epoch": 2.3731113166820843, "grad_norm": 0.07929688692092896, "learning_rate": 0.01, "loss": 2.0357, "step": 23088 }, { "epoch": 2.3734196731421524, "grad_norm": 0.07446785271167755, "learning_rate": 0.01, "loss": 2.0058, "step": 23091 }, { "epoch": 2.37372802960222, "grad_norm": 0.08491658419370651, "learning_rate": 0.01, "loss": 2.0372, "step": 23094 }, { "epoch": 2.374036386062288, "grad_norm": 0.04434317350387573, "learning_rate": 0.01, "loss": 2.022, "step": 23097 }, { "epoch": 2.374344742522356, "grad_norm": 0.031715601682662964, "learning_rate": 0.01, "loss": 2.0436, "step": 23100 }, { "epoch": 2.3746530989824235, "grad_norm": 0.05073494091629982, "learning_rate": 0.01, "loss": 2.0205, "step": 23103 }, { "epoch": 2.3749614554424916, "grad_norm": 0.0484505333006382, "learning_rate": 0.01, "loss": 2.0388, "step": 23106 }, { "epoch": 2.3752698119025593, "grad_norm": 0.036467526108026505, "learning_rate": 0.01, "loss": 2.0136, "step": 23109 }, { "epoch": 2.3755781683626274, "grad_norm": 0.048451874405145645, "learning_rate": 0.01, "loss": 2.014, "step": 23112 }, { "epoch": 2.375886524822695, "grad_norm": 0.048714105039834976, "learning_rate": 0.01, "loss": 2.0546, "step": 23115 }, { "epoch": 2.3761948812827627, "grad_norm": 0.07499600946903229, "learning_rate": 0.01, "loss": 2.024, "step": 23118 }, { "epoch": 2.376503237742831, "grad_norm": 0.052757177501916885, "learning_rate": 0.01, "loss": 2.07, "step": 23121 }, { "epoch": 2.3768115942028984, "grad_norm": 0.062441807240247726, "learning_rate": 0.01, "loss": 2.0156, "step": 23124 }, { "epoch": 2.3771199506629666, "grad_norm": 0.04984834045171738, "learning_rate": 0.01, "loss": 2.0046, "step": 23127 }, { "epoch": 2.377428307123034, "grad_norm": 0.12042737007141113, "learning_rate": 0.01, "loss": 2.0043, "step": 23130 }, { "epoch": 2.377736663583102, "grad_norm": 0.08114577829837799, "learning_rate": 0.01, "loss": 2.0212, "step": 23133 }, { "epoch": 2.37804502004317, "grad_norm": 0.044427551329135895, "learning_rate": 0.01, "loss": 2.0127, "step": 23136 }, { "epoch": 2.3783533765032376, "grad_norm": 0.04814080893993378, "learning_rate": 0.01, "loss": 2.0237, "step": 23139 }, { "epoch": 2.3786617329633057, "grad_norm": 0.03783731907606125, "learning_rate": 0.01, "loss": 2.0293, "step": 23142 }, { "epoch": 2.3789700894233734, "grad_norm": 0.03743833675980568, "learning_rate": 0.01, "loss": 2.0446, "step": 23145 }, { "epoch": 2.3792784458834415, "grad_norm": 0.05341466888785362, "learning_rate": 0.01, "loss": 2.0071, "step": 23148 }, { "epoch": 2.379586802343509, "grad_norm": 0.04965018108487129, "learning_rate": 0.01, "loss": 2.0257, "step": 23151 }, { "epoch": 2.379895158803577, "grad_norm": 0.06330037117004395, "learning_rate": 0.01, "loss": 2.0466, "step": 23154 }, { "epoch": 2.380203515263645, "grad_norm": 0.04018980264663696, "learning_rate": 0.01, "loss": 2.045, "step": 23157 }, { "epoch": 2.3805118717237126, "grad_norm": 0.04469529166817665, "learning_rate": 0.01, "loss": 2.0437, "step": 23160 }, { "epoch": 2.3808202281837803, "grad_norm": 0.07242292910814285, "learning_rate": 0.01, "loss": 2.0199, "step": 23163 }, { "epoch": 2.3811285846438484, "grad_norm": 0.05165792256593704, "learning_rate": 0.01, "loss": 2.0459, "step": 23166 }, { "epoch": 2.381436941103916, "grad_norm": 0.12728899717330933, "learning_rate": 0.01, "loss": 2.0086, "step": 23169 }, { "epoch": 2.381745297563984, "grad_norm": 0.07357775419950485, "learning_rate": 0.01, "loss": 2.0436, "step": 23172 }, { "epoch": 2.382053654024052, "grad_norm": 0.05125182494521141, "learning_rate": 0.01, "loss": 2.0269, "step": 23175 }, { "epoch": 2.38236201048412, "grad_norm": 0.061880383640527725, "learning_rate": 0.01, "loss": 2.0355, "step": 23178 }, { "epoch": 2.3826703669441875, "grad_norm": 0.03774267062544823, "learning_rate": 0.01, "loss": 2.0205, "step": 23181 }, { "epoch": 2.382978723404255, "grad_norm": 0.04750019684433937, "learning_rate": 0.01, "loss": 2.046, "step": 23184 }, { "epoch": 2.3832870798643233, "grad_norm": 0.06727109849452972, "learning_rate": 0.01, "loss": 2.0545, "step": 23187 }, { "epoch": 2.383595436324391, "grad_norm": 0.08848878741264343, "learning_rate": 0.01, "loss": 2.0458, "step": 23190 }, { "epoch": 2.3839037927844586, "grad_norm": 0.07577743381261826, "learning_rate": 0.01, "loss": 2.0502, "step": 23193 }, { "epoch": 2.3842121492445267, "grad_norm": 0.07908739149570465, "learning_rate": 0.01, "loss": 2.0516, "step": 23196 }, { "epoch": 2.3845205057045944, "grad_norm": 0.08100943267345428, "learning_rate": 0.01, "loss": 2.036, "step": 23199 }, { "epoch": 2.3848288621646625, "grad_norm": 0.12388890236616135, "learning_rate": 0.01, "loss": 2.0506, "step": 23202 }, { "epoch": 2.38513721862473, "grad_norm": 0.0538008026778698, "learning_rate": 0.01, "loss": 2.0236, "step": 23205 }, { "epoch": 2.3854455750847983, "grad_norm": 0.04448952525854111, "learning_rate": 0.01, "loss": 2.0354, "step": 23208 }, { "epoch": 2.385753931544866, "grad_norm": 0.038850218057632446, "learning_rate": 0.01, "loss": 2.0368, "step": 23211 }, { "epoch": 2.3860622880049336, "grad_norm": 0.10436736047267914, "learning_rate": 0.01, "loss": 2.0316, "step": 23214 }, { "epoch": 2.3863706444650017, "grad_norm": 0.03787916898727417, "learning_rate": 0.01, "loss": 2.0358, "step": 23217 }, { "epoch": 2.3866790009250693, "grad_norm": 0.0874573364853859, "learning_rate": 0.01, "loss": 2.0306, "step": 23220 }, { "epoch": 2.386987357385137, "grad_norm": 0.08322203904390335, "learning_rate": 0.01, "loss": 2.0495, "step": 23223 }, { "epoch": 2.387295713845205, "grad_norm": 0.034374579787254333, "learning_rate": 0.01, "loss": 2.0047, "step": 23226 }, { "epoch": 2.3876040703052728, "grad_norm": 0.03772469237446785, "learning_rate": 0.01, "loss": 2.0309, "step": 23229 }, { "epoch": 2.387912426765341, "grad_norm": 0.12290430814027786, "learning_rate": 0.01, "loss": 2.0343, "step": 23232 }, { "epoch": 2.3882207832254085, "grad_norm": 0.04306014999747276, "learning_rate": 0.01, "loss": 2.0248, "step": 23235 }, { "epoch": 2.3885291396854766, "grad_norm": 0.10940956324338913, "learning_rate": 0.01, "loss": 2.0322, "step": 23238 }, { "epoch": 2.3888374961455443, "grad_norm": 0.06950829923152924, "learning_rate": 0.01, "loss": 2.0168, "step": 23241 }, { "epoch": 2.389145852605612, "grad_norm": 0.11061616986989975, "learning_rate": 0.01, "loss": 2.017, "step": 23244 }, { "epoch": 2.38945420906568, "grad_norm": 0.051813628524541855, "learning_rate": 0.01, "loss": 2.0657, "step": 23247 }, { "epoch": 2.3897625655257477, "grad_norm": 0.10411377251148224, "learning_rate": 0.01, "loss": 2.0044, "step": 23250 }, { "epoch": 2.3900709219858154, "grad_norm": 0.03951719403266907, "learning_rate": 0.01, "loss": 2.0135, "step": 23253 }, { "epoch": 2.3903792784458835, "grad_norm": 0.08920100331306458, "learning_rate": 0.01, "loss": 2.0523, "step": 23256 }, { "epoch": 2.390687634905951, "grad_norm": 0.03843710198998451, "learning_rate": 0.01, "loss": 2.0387, "step": 23259 }, { "epoch": 2.3909959913660193, "grad_norm": 0.09093856811523438, "learning_rate": 0.01, "loss": 2.0477, "step": 23262 }, { "epoch": 2.391304347826087, "grad_norm": 0.06239760294556618, "learning_rate": 0.01, "loss": 2.0407, "step": 23265 }, { "epoch": 2.391612704286155, "grad_norm": 0.07422836124897003, "learning_rate": 0.01, "loss": 2.0232, "step": 23268 }, { "epoch": 2.3919210607462227, "grad_norm": 0.04418382793664932, "learning_rate": 0.01, "loss": 2.0346, "step": 23271 }, { "epoch": 2.3922294172062903, "grad_norm": 0.08631134033203125, "learning_rate": 0.01, "loss": 2.0675, "step": 23274 }, { "epoch": 2.3925377736663584, "grad_norm": 0.05164894834160805, "learning_rate": 0.01, "loss": 2.0328, "step": 23277 }, { "epoch": 2.392846130126426, "grad_norm": 0.056628335267305374, "learning_rate": 0.01, "loss": 2.0462, "step": 23280 }, { "epoch": 2.3931544865864938, "grad_norm": 0.10568714141845703, "learning_rate": 0.01, "loss": 2.0295, "step": 23283 }, { "epoch": 2.393462843046562, "grad_norm": 0.1235639825463295, "learning_rate": 0.01, "loss": 2.0149, "step": 23286 }, { "epoch": 2.3937711995066295, "grad_norm": 0.04766182228922844, "learning_rate": 0.01, "loss": 2.016, "step": 23289 }, { "epoch": 2.3940795559666976, "grad_norm": 0.040086500346660614, "learning_rate": 0.01, "loss": 2.0011, "step": 23292 }, { "epoch": 2.3943879124267653, "grad_norm": 0.04474460333585739, "learning_rate": 0.01, "loss": 2.003, "step": 23295 }, { "epoch": 2.3946962688868334, "grad_norm": 0.03349655494093895, "learning_rate": 0.01, "loss": 2.0251, "step": 23298 }, { "epoch": 2.395004625346901, "grad_norm": 0.04361939802765846, "learning_rate": 0.01, "loss": 2.0105, "step": 23301 }, { "epoch": 2.3953129818069687, "grad_norm": 0.08834217488765717, "learning_rate": 0.01, "loss": 2.0455, "step": 23304 }, { "epoch": 2.395621338267037, "grad_norm": 0.09799984842538834, "learning_rate": 0.01, "loss": 2.0153, "step": 23307 }, { "epoch": 2.3959296947271045, "grad_norm": 0.06085311621427536, "learning_rate": 0.01, "loss": 2.0326, "step": 23310 }, { "epoch": 2.396238051187172, "grad_norm": 0.04466985911130905, "learning_rate": 0.01, "loss": 2.0122, "step": 23313 }, { "epoch": 2.3965464076472403, "grad_norm": 0.05248266085982323, "learning_rate": 0.01, "loss": 2.0388, "step": 23316 }, { "epoch": 2.396854764107308, "grad_norm": 0.06024044379591942, "learning_rate": 0.01, "loss": 2.0232, "step": 23319 }, { "epoch": 2.397163120567376, "grad_norm": 0.05061257630586624, "learning_rate": 0.01, "loss": 2.0259, "step": 23322 }, { "epoch": 2.3974714770274437, "grad_norm": 0.04136064276099205, "learning_rate": 0.01, "loss": 2.0356, "step": 23325 }, { "epoch": 2.397779833487512, "grad_norm": 0.048327211290597916, "learning_rate": 0.01, "loss": 2.0158, "step": 23328 }, { "epoch": 2.3980881899475794, "grad_norm": 0.038865648210048676, "learning_rate": 0.01, "loss": 2.0312, "step": 23331 }, { "epoch": 2.398396546407647, "grad_norm": 0.04522191360592842, "learning_rate": 0.01, "loss": 2.0125, "step": 23334 }, { "epoch": 2.398704902867715, "grad_norm": 0.033488884568214417, "learning_rate": 0.01, "loss": 2.0496, "step": 23337 }, { "epoch": 2.399013259327783, "grad_norm": 0.05207303538918495, "learning_rate": 0.01, "loss": 2.0254, "step": 23340 }, { "epoch": 2.3993216157878505, "grad_norm": 0.09892462193965912, "learning_rate": 0.01, "loss": 2.0294, "step": 23343 }, { "epoch": 2.3996299722479186, "grad_norm": 0.07379446923732758, "learning_rate": 0.01, "loss": 2.0351, "step": 23346 }, { "epoch": 2.3999383287079863, "grad_norm": 0.07604874670505524, "learning_rate": 0.01, "loss": 2.0348, "step": 23349 }, { "epoch": 2.4002466851680544, "grad_norm": 0.05876392871141434, "learning_rate": 0.01, "loss": 2.0442, "step": 23352 }, { "epoch": 2.400555041628122, "grad_norm": 0.06497185677289963, "learning_rate": 0.01, "loss": 2.0289, "step": 23355 }, { "epoch": 2.40086339808819, "grad_norm": 0.059574466198682785, "learning_rate": 0.01, "loss": 2.0056, "step": 23358 }, { "epoch": 2.401171754548258, "grad_norm": 0.03427527844905853, "learning_rate": 0.01, "loss": 2.0115, "step": 23361 }, { "epoch": 2.4014801110083255, "grad_norm": 0.04248304292559624, "learning_rate": 0.01, "loss": 2.022, "step": 23364 }, { "epoch": 2.4017884674683936, "grad_norm": 0.11010278761386871, "learning_rate": 0.01, "loss": 2.0378, "step": 23367 }, { "epoch": 2.4020968239284612, "grad_norm": 0.03992384672164917, "learning_rate": 0.01, "loss": 2.024, "step": 23370 }, { "epoch": 2.402405180388529, "grad_norm": 0.0774572566151619, "learning_rate": 0.01, "loss": 2.047, "step": 23373 }, { "epoch": 2.402713536848597, "grad_norm": 0.0753302052617073, "learning_rate": 0.01, "loss": 2.0182, "step": 23376 }, { "epoch": 2.4030218933086647, "grad_norm": 0.06872344017028809, "learning_rate": 0.01, "loss": 2.041, "step": 23379 }, { "epoch": 2.4033302497687328, "grad_norm": 0.03949934244155884, "learning_rate": 0.01, "loss": 2.0444, "step": 23382 }, { "epoch": 2.4036386062288004, "grad_norm": 0.06230955943465233, "learning_rate": 0.01, "loss": 2.0168, "step": 23385 }, { "epoch": 2.4039469626888685, "grad_norm": 0.0821446031332016, "learning_rate": 0.01, "loss": 2.0091, "step": 23388 }, { "epoch": 2.404255319148936, "grad_norm": 0.0392254963517189, "learning_rate": 0.01, "loss": 2.0083, "step": 23391 }, { "epoch": 2.404563675609004, "grad_norm": 0.04208219051361084, "learning_rate": 0.01, "loss": 2.008, "step": 23394 }, { "epoch": 2.404872032069072, "grad_norm": 0.053683795034885406, "learning_rate": 0.01, "loss": 2.0175, "step": 23397 }, { "epoch": 2.4051803885291396, "grad_norm": 0.0707748755812645, "learning_rate": 0.01, "loss": 2.0447, "step": 23400 }, { "epoch": 2.4054887449892073, "grad_norm": 0.04077059030532837, "learning_rate": 0.01, "loss": 2.0247, "step": 23403 }, { "epoch": 2.4057971014492754, "grad_norm": 0.041255395859479904, "learning_rate": 0.01, "loss": 2.0542, "step": 23406 }, { "epoch": 2.406105457909343, "grad_norm": 0.04560267925262451, "learning_rate": 0.01, "loss": 2.0134, "step": 23409 }, { "epoch": 2.406413814369411, "grad_norm": 0.1152443066239357, "learning_rate": 0.01, "loss": 2.0367, "step": 23412 }, { "epoch": 2.406722170829479, "grad_norm": 0.06491648405790329, "learning_rate": 0.01, "loss": 2.046, "step": 23415 }, { "epoch": 2.407030527289547, "grad_norm": 0.10551664233207703, "learning_rate": 0.01, "loss": 2.0176, "step": 23418 }, { "epoch": 2.4073388837496146, "grad_norm": 0.05565710365772247, "learning_rate": 0.01, "loss": 2.051, "step": 23421 }, { "epoch": 2.4076472402096822, "grad_norm": 0.07831353694200516, "learning_rate": 0.01, "loss": 2.0311, "step": 23424 }, { "epoch": 2.4079555966697503, "grad_norm": 0.04822281748056412, "learning_rate": 0.01, "loss": 2.0086, "step": 23427 }, { "epoch": 2.408263953129818, "grad_norm": 0.10326611250638962, "learning_rate": 0.01, "loss": 2.0079, "step": 23430 }, { "epoch": 2.4085723095898857, "grad_norm": 0.03122270293533802, "learning_rate": 0.01, "loss": 2.0371, "step": 23433 }, { "epoch": 2.4088806660499538, "grad_norm": 0.04365687072277069, "learning_rate": 0.01, "loss": 2.0323, "step": 23436 }, { "epoch": 2.4091890225100214, "grad_norm": 0.055597368627786636, "learning_rate": 0.01, "loss": 2.0345, "step": 23439 }, { "epoch": 2.4094973789700895, "grad_norm": 0.05635388568043709, "learning_rate": 0.01, "loss": 2.0255, "step": 23442 }, { "epoch": 2.409805735430157, "grad_norm": 0.059105440974235535, "learning_rate": 0.01, "loss": 2.0366, "step": 23445 }, { "epoch": 2.4101140918902253, "grad_norm": 0.03262312710285187, "learning_rate": 0.01, "loss": 2.0371, "step": 23448 }, { "epoch": 2.410422448350293, "grad_norm": 0.12139608711004257, "learning_rate": 0.01, "loss": 2.0423, "step": 23451 }, { "epoch": 2.4107308048103606, "grad_norm": 0.0331861712038517, "learning_rate": 0.01, "loss": 2.0242, "step": 23454 }, { "epoch": 2.4110391612704287, "grad_norm": 0.14580830931663513, "learning_rate": 0.01, "loss": 2.0268, "step": 23457 }, { "epoch": 2.4113475177304964, "grad_norm": 0.08361393213272095, "learning_rate": 0.01, "loss": 2.0357, "step": 23460 }, { "epoch": 2.4116558741905645, "grad_norm": 0.04604450613260269, "learning_rate": 0.01, "loss": 2.0218, "step": 23463 }, { "epoch": 2.411964230650632, "grad_norm": 0.0660676434636116, "learning_rate": 0.01, "loss": 2.0051, "step": 23466 }, { "epoch": 2.4122725871107, "grad_norm": 0.03829890862107277, "learning_rate": 0.01, "loss": 2.0037, "step": 23469 }, { "epoch": 2.412580943570768, "grad_norm": 0.03486289829015732, "learning_rate": 0.01, "loss": 2.0157, "step": 23472 }, { "epoch": 2.4128893000308356, "grad_norm": 0.043325070291757584, "learning_rate": 0.01, "loss": 2.0228, "step": 23475 }, { "epoch": 2.4131976564909037, "grad_norm": 0.062313832342624664, "learning_rate": 0.01, "loss": 2.0278, "step": 23478 }, { "epoch": 2.4135060129509713, "grad_norm": 0.07333476096391678, "learning_rate": 0.01, "loss": 2.031, "step": 23481 }, { "epoch": 2.413814369411039, "grad_norm": 0.05855254456400871, "learning_rate": 0.01, "loss": 2.0019, "step": 23484 }, { "epoch": 2.414122725871107, "grad_norm": 0.07818280905485153, "learning_rate": 0.01, "loss": 2.0305, "step": 23487 }, { "epoch": 2.4144310823311748, "grad_norm": 0.17364929616451263, "learning_rate": 0.01, "loss": 2.0358, "step": 23490 }, { "epoch": 2.414739438791243, "grad_norm": 0.04748675599694252, "learning_rate": 0.01, "loss": 2.0441, "step": 23493 }, { "epoch": 2.4150477952513105, "grad_norm": 0.04472067952156067, "learning_rate": 0.01, "loss": 2.001, "step": 23496 }, { "epoch": 2.415356151711378, "grad_norm": 0.04624779149889946, "learning_rate": 0.01, "loss": 2.0606, "step": 23499 }, { "epoch": 2.4156645081714463, "grad_norm": 0.04642438516020775, "learning_rate": 0.01, "loss": 2.0387, "step": 23502 }, { "epoch": 2.415972864631514, "grad_norm": 0.03872397541999817, "learning_rate": 0.01, "loss": 2.0463, "step": 23505 }, { "epoch": 2.416281221091582, "grad_norm": 0.055659957230091095, "learning_rate": 0.01, "loss": 2.0076, "step": 23508 }, { "epoch": 2.4165895775516497, "grad_norm": 0.05187131464481354, "learning_rate": 0.01, "loss": 2.0252, "step": 23511 }, { "epoch": 2.4168979340117174, "grad_norm": 0.04747389629483223, "learning_rate": 0.01, "loss": 1.9933, "step": 23514 }, { "epoch": 2.4172062904717855, "grad_norm": 0.04594513028860092, "learning_rate": 0.01, "loss": 2.0489, "step": 23517 }, { "epoch": 2.417514646931853, "grad_norm": 0.08976871520280838, "learning_rate": 0.01, "loss": 2.0728, "step": 23520 }, { "epoch": 2.4178230033919212, "grad_norm": 0.09563881158828735, "learning_rate": 0.01, "loss": 2.0399, "step": 23523 }, { "epoch": 2.418131359851989, "grad_norm": 0.05488200485706329, "learning_rate": 0.01, "loss": 2.0314, "step": 23526 }, { "epoch": 2.4184397163120566, "grad_norm": 0.10557366907596588, "learning_rate": 0.01, "loss": 2.0141, "step": 23529 }, { "epoch": 2.4187480727721247, "grad_norm": 0.06399507075548172, "learning_rate": 0.01, "loss": 2.032, "step": 23532 }, { "epoch": 2.4190564292321923, "grad_norm": 0.06832768023014069, "learning_rate": 0.01, "loss": 2.0445, "step": 23535 }, { "epoch": 2.4193647856922604, "grad_norm": 0.04164440929889679, "learning_rate": 0.01, "loss": 2.046, "step": 23538 }, { "epoch": 2.419673142152328, "grad_norm": 0.03020538203418255, "learning_rate": 0.01, "loss": 2.0368, "step": 23541 }, { "epoch": 2.4199814986123958, "grad_norm": 0.05985027551651001, "learning_rate": 0.01, "loss": 2.0166, "step": 23544 }, { "epoch": 2.420289855072464, "grad_norm": 0.08126893639564514, "learning_rate": 0.01, "loss": 2.0334, "step": 23547 }, { "epoch": 2.4205982115325315, "grad_norm": 0.03897944465279579, "learning_rate": 0.01, "loss": 2.0475, "step": 23550 }, { "epoch": 2.4209065679925996, "grad_norm": 0.05052472651004791, "learning_rate": 0.01, "loss": 2.0536, "step": 23553 }, { "epoch": 2.4212149244526673, "grad_norm": 0.03434956073760986, "learning_rate": 0.01, "loss": 2.0379, "step": 23556 }, { "epoch": 2.421523280912735, "grad_norm": 0.0911344438791275, "learning_rate": 0.01, "loss": 2.0412, "step": 23559 }, { "epoch": 2.421831637372803, "grad_norm": 0.11097840219736099, "learning_rate": 0.01, "loss": 1.9977, "step": 23562 }, { "epoch": 2.4221399938328707, "grad_norm": 0.09742028266191483, "learning_rate": 0.01, "loss": 2.0188, "step": 23565 }, { "epoch": 2.422448350292939, "grad_norm": 0.04184804484248161, "learning_rate": 0.01, "loss": 2.0143, "step": 23568 }, { "epoch": 2.4227567067530065, "grad_norm": 0.04188203811645508, "learning_rate": 0.01, "loss": 2.0039, "step": 23571 }, { "epoch": 2.423065063213074, "grad_norm": 0.05326924845576286, "learning_rate": 0.01, "loss": 2.0269, "step": 23574 }, { "epoch": 2.4233734196731422, "grad_norm": 0.04598800092935562, "learning_rate": 0.01, "loss": 2.0272, "step": 23577 }, { "epoch": 2.42368177613321, "grad_norm": 0.1110750213265419, "learning_rate": 0.01, "loss": 2.0178, "step": 23580 }, { "epoch": 2.423990132593278, "grad_norm": 0.0473935566842556, "learning_rate": 0.01, "loss": 2.0289, "step": 23583 }, { "epoch": 2.4242984890533457, "grad_norm": 0.08599594235420227, "learning_rate": 0.01, "loss": 2.0039, "step": 23586 }, { "epoch": 2.4246068455134133, "grad_norm": 0.03763740509748459, "learning_rate": 0.01, "loss": 2.0332, "step": 23589 }, { "epoch": 2.4249152019734814, "grad_norm": 0.056663963943719864, "learning_rate": 0.01, "loss": 2.0284, "step": 23592 }, { "epoch": 2.425223558433549, "grad_norm": 0.09664086252450943, "learning_rate": 0.01, "loss": 2.0424, "step": 23595 }, { "epoch": 2.425531914893617, "grad_norm": 0.047283969819545746, "learning_rate": 0.01, "loss": 2.0179, "step": 23598 }, { "epoch": 2.425840271353685, "grad_norm": 0.03726861625909805, "learning_rate": 0.01, "loss": 2.0154, "step": 23601 }, { "epoch": 2.4261486278137525, "grad_norm": 0.0514906644821167, "learning_rate": 0.01, "loss": 2.0229, "step": 23604 }, { "epoch": 2.4264569842738206, "grad_norm": 0.11972562968730927, "learning_rate": 0.01, "loss": 2.07, "step": 23607 }, { "epoch": 2.4267653407338883, "grad_norm": 0.17023397982120514, "learning_rate": 0.01, "loss": 2.0478, "step": 23610 }, { "epoch": 2.4270736971939564, "grad_norm": 0.1355310082435608, "learning_rate": 0.01, "loss": 2.0356, "step": 23613 }, { "epoch": 2.427382053654024, "grad_norm": 0.039905257523059845, "learning_rate": 0.01, "loss": 2.0305, "step": 23616 }, { "epoch": 2.4276904101140917, "grad_norm": 0.0339365154504776, "learning_rate": 0.01, "loss": 2.0328, "step": 23619 }, { "epoch": 2.42799876657416, "grad_norm": 0.03365493193268776, "learning_rate": 0.01, "loss": 2.0267, "step": 23622 }, { "epoch": 2.4283071230342275, "grad_norm": 0.041596464812755585, "learning_rate": 0.01, "loss": 2.0078, "step": 23625 }, { "epoch": 2.4286154794942956, "grad_norm": 0.0537833645939827, "learning_rate": 0.01, "loss": 2.0444, "step": 23628 }, { "epoch": 2.4289238359543632, "grad_norm": 0.049658019095659256, "learning_rate": 0.01, "loss": 2.0165, "step": 23631 }, { "epoch": 2.429232192414431, "grad_norm": 0.03983505442738533, "learning_rate": 0.01, "loss": 2.0223, "step": 23634 }, { "epoch": 2.429540548874499, "grad_norm": 0.0780608057975769, "learning_rate": 0.01, "loss": 2.0011, "step": 23637 }, { "epoch": 2.4298489053345667, "grad_norm": 0.13898798823356628, "learning_rate": 0.01, "loss": 2.0424, "step": 23640 }, { "epoch": 2.4301572617946348, "grad_norm": 0.07781122624874115, "learning_rate": 0.01, "loss": 2.0499, "step": 23643 }, { "epoch": 2.4304656182547024, "grad_norm": 0.045428015291690826, "learning_rate": 0.01, "loss": 2.0188, "step": 23646 }, { "epoch": 2.43077397471477, "grad_norm": 0.03437395393848419, "learning_rate": 0.01, "loss": 2.038, "step": 23649 }, { "epoch": 2.431082331174838, "grad_norm": 0.06030704453587532, "learning_rate": 0.01, "loss": 2.0563, "step": 23652 }, { "epoch": 2.431390687634906, "grad_norm": 0.046194083988666534, "learning_rate": 0.01, "loss": 2.0324, "step": 23655 }, { "epoch": 2.431699044094974, "grad_norm": 0.10041481256484985, "learning_rate": 0.01, "loss": 2.0184, "step": 23658 }, { "epoch": 2.4320074005550416, "grad_norm": 0.07109228521585464, "learning_rate": 0.01, "loss": 2.002, "step": 23661 }, { "epoch": 2.4323157570151093, "grad_norm": 0.03319769352674484, "learning_rate": 0.01, "loss": 2.0172, "step": 23664 }, { "epoch": 2.4326241134751774, "grad_norm": 0.047379735857248306, "learning_rate": 0.01, "loss": 2.0495, "step": 23667 }, { "epoch": 2.432932469935245, "grad_norm": 0.04836783930659294, "learning_rate": 0.01, "loss": 2.045, "step": 23670 }, { "epoch": 2.433240826395313, "grad_norm": 0.07264747470617294, "learning_rate": 0.01, "loss": 2.0147, "step": 23673 }, { "epoch": 2.433549182855381, "grad_norm": 0.09331446141004562, "learning_rate": 0.01, "loss": 2.0259, "step": 23676 }, { "epoch": 2.433857539315449, "grad_norm": 0.07618245482444763, "learning_rate": 0.01, "loss": 2.0188, "step": 23679 }, { "epoch": 2.4341658957755166, "grad_norm": 0.13270626962184906, "learning_rate": 0.01, "loss": 2.05, "step": 23682 }, { "epoch": 2.4344742522355842, "grad_norm": 0.06691613793373108, "learning_rate": 0.01, "loss": 2.0353, "step": 23685 }, { "epoch": 2.4347826086956523, "grad_norm": 0.04376102611422539, "learning_rate": 0.01, "loss": 2.0213, "step": 23688 }, { "epoch": 2.43509096515572, "grad_norm": 0.04829081892967224, "learning_rate": 0.01, "loss": 2.0337, "step": 23691 }, { "epoch": 2.4353993216157876, "grad_norm": 0.04604188725352287, "learning_rate": 0.01, "loss": 2.0297, "step": 23694 }, { "epoch": 2.4357076780758558, "grad_norm": 0.06296957284212112, "learning_rate": 0.01, "loss": 2.0044, "step": 23697 }, { "epoch": 2.4360160345359234, "grad_norm": 0.07184949517250061, "learning_rate": 0.01, "loss": 2.0204, "step": 23700 }, { "epoch": 2.4363243909959915, "grad_norm": 0.11544491350650787, "learning_rate": 0.01, "loss": 2.0512, "step": 23703 }, { "epoch": 2.436632747456059, "grad_norm": 0.03796609491109848, "learning_rate": 0.01, "loss": 2.0154, "step": 23706 }, { "epoch": 2.4369411039161273, "grad_norm": 0.09068101644515991, "learning_rate": 0.01, "loss": 2.0402, "step": 23709 }, { "epoch": 2.437249460376195, "grad_norm": 0.06220867484807968, "learning_rate": 0.01, "loss": 2.0289, "step": 23712 }, { "epoch": 2.4375578168362626, "grad_norm": 0.040711212903261185, "learning_rate": 0.01, "loss": 2.0365, "step": 23715 }, { "epoch": 2.4378661732963307, "grad_norm": 0.03610675781965256, "learning_rate": 0.01, "loss": 2.023, "step": 23718 }, { "epoch": 2.4381745297563984, "grad_norm": 0.06709878146648407, "learning_rate": 0.01, "loss": 2.0125, "step": 23721 }, { "epoch": 2.438482886216466, "grad_norm": 0.10784180462360382, "learning_rate": 0.01, "loss": 2.0328, "step": 23724 }, { "epoch": 2.438791242676534, "grad_norm": 0.0911094918847084, "learning_rate": 0.01, "loss": 2.0594, "step": 23727 }, { "epoch": 2.439099599136602, "grad_norm": 0.059867773205041885, "learning_rate": 0.01, "loss": 2.0244, "step": 23730 }, { "epoch": 2.43940795559667, "grad_norm": 0.06214331462979317, "learning_rate": 0.01, "loss": 2.0698, "step": 23733 }, { "epoch": 2.4397163120567376, "grad_norm": 0.058527860790491104, "learning_rate": 0.01, "loss": 2.0562, "step": 23736 }, { "epoch": 2.4400246685168057, "grad_norm": 0.09416967630386353, "learning_rate": 0.01, "loss": 2.0385, "step": 23739 }, { "epoch": 2.4403330249768733, "grad_norm": 0.11225190758705139, "learning_rate": 0.01, "loss": 2.0534, "step": 23742 }, { "epoch": 2.440641381436941, "grad_norm": 0.056495241820812225, "learning_rate": 0.01, "loss": 2.038, "step": 23745 }, { "epoch": 2.440949737897009, "grad_norm": 0.05432302877306938, "learning_rate": 0.01, "loss": 2.01, "step": 23748 }, { "epoch": 2.4412580943570767, "grad_norm": 0.04850140959024429, "learning_rate": 0.01, "loss": 2.0303, "step": 23751 }, { "epoch": 2.4415664508171444, "grad_norm": 0.05038965865969658, "learning_rate": 0.01, "loss": 2.0226, "step": 23754 }, { "epoch": 2.4418748072772125, "grad_norm": 0.050226423889398575, "learning_rate": 0.01, "loss": 2.0238, "step": 23757 }, { "epoch": 2.44218316373728, "grad_norm": 0.05979544296860695, "learning_rate": 0.01, "loss": 2.0378, "step": 23760 }, { "epoch": 2.4424915201973483, "grad_norm": 0.04254556819796562, "learning_rate": 0.01, "loss": 2.0207, "step": 23763 }, { "epoch": 2.442799876657416, "grad_norm": 0.1078273206949234, "learning_rate": 0.01, "loss": 2.0342, "step": 23766 }, { "epoch": 2.443108233117484, "grad_norm": 0.0591372586786747, "learning_rate": 0.01, "loss": 2.0186, "step": 23769 }, { "epoch": 2.4434165895775517, "grad_norm": 0.05430880934000015, "learning_rate": 0.01, "loss": 2.0232, "step": 23772 }, { "epoch": 2.4437249460376194, "grad_norm": 0.07246505469083786, "learning_rate": 0.01, "loss": 2.0372, "step": 23775 }, { "epoch": 2.4440333024976875, "grad_norm": 0.04457786679267883, "learning_rate": 0.01, "loss": 2.0157, "step": 23778 }, { "epoch": 2.444341658957755, "grad_norm": 0.03915979340672493, "learning_rate": 0.01, "loss": 2.0173, "step": 23781 }, { "epoch": 2.444650015417823, "grad_norm": 0.08284246176481247, "learning_rate": 0.01, "loss": 2.0433, "step": 23784 }, { "epoch": 2.444958371877891, "grad_norm": 0.08254294097423553, "learning_rate": 0.01, "loss": 2.012, "step": 23787 }, { "epoch": 2.4452667283379586, "grad_norm": 0.12263736873865128, "learning_rate": 0.01, "loss": 2.0491, "step": 23790 }, { "epoch": 2.4455750847980267, "grad_norm": 0.08192913979291916, "learning_rate": 0.01, "loss": 2.047, "step": 23793 }, { "epoch": 2.4458834412580943, "grad_norm": 0.09507100284099579, "learning_rate": 0.01, "loss": 2.0357, "step": 23796 }, { "epoch": 2.4461917977181624, "grad_norm": 0.10554829984903336, "learning_rate": 0.01, "loss": 2.024, "step": 23799 }, { "epoch": 2.44650015417823, "grad_norm": 0.051542408764362335, "learning_rate": 0.01, "loss": 2.0304, "step": 23802 }, { "epoch": 2.4468085106382977, "grad_norm": 0.05467440187931061, "learning_rate": 0.01, "loss": 2.043, "step": 23805 }, { "epoch": 2.447116867098366, "grad_norm": 0.033462487161159515, "learning_rate": 0.01, "loss": 2.0267, "step": 23808 }, { "epoch": 2.4474252235584335, "grad_norm": 0.04688438028097153, "learning_rate": 0.01, "loss": 2.032, "step": 23811 }, { "epoch": 2.447733580018501, "grad_norm": 0.04699881002306938, "learning_rate": 0.01, "loss": 2.0392, "step": 23814 }, { "epoch": 2.4480419364785693, "grad_norm": 0.03932753950357437, "learning_rate": 0.01, "loss": 2.0193, "step": 23817 }, { "epoch": 2.448350292938637, "grad_norm": 0.11096165329217911, "learning_rate": 0.01, "loss": 2.0124, "step": 23820 }, { "epoch": 2.448658649398705, "grad_norm": 0.03774998337030411, "learning_rate": 0.01, "loss": 2.0192, "step": 23823 }, { "epoch": 2.4489670058587727, "grad_norm": 0.03317665681242943, "learning_rate": 0.01, "loss": 2.0218, "step": 23826 }, { "epoch": 2.449275362318841, "grad_norm": 0.06077956408262253, "learning_rate": 0.01, "loss": 2.0397, "step": 23829 }, { "epoch": 2.4495837187789085, "grad_norm": 0.08997214585542679, "learning_rate": 0.01, "loss": 2.0451, "step": 23832 }, { "epoch": 2.449892075238976, "grad_norm": 0.12234194576740265, "learning_rate": 0.01, "loss": 2.0144, "step": 23835 }, { "epoch": 2.4502004316990442, "grad_norm": 0.13833922147750854, "learning_rate": 0.01, "loss": 2.0348, "step": 23838 }, { "epoch": 2.450508788159112, "grad_norm": 0.06707292795181274, "learning_rate": 0.01, "loss": 2.0276, "step": 23841 }, { "epoch": 2.4508171446191795, "grad_norm": 0.04679076373577118, "learning_rate": 0.01, "loss": 2.0624, "step": 23844 }, { "epoch": 2.4511255010792476, "grad_norm": 0.08921289443969727, "learning_rate": 0.01, "loss": 2.0188, "step": 23847 }, { "epoch": 2.4514338575393153, "grad_norm": 0.056680746376514435, "learning_rate": 0.01, "loss": 2.0299, "step": 23850 }, { "epoch": 2.4517422139993834, "grad_norm": 0.07349438965320587, "learning_rate": 0.01, "loss": 2.0249, "step": 23853 }, { "epoch": 2.452050570459451, "grad_norm": 0.057414181530475616, "learning_rate": 0.01, "loss": 2.0054, "step": 23856 }, { "epoch": 2.452358926919519, "grad_norm": 0.09144090861082077, "learning_rate": 0.01, "loss": 2.0264, "step": 23859 }, { "epoch": 2.452667283379587, "grad_norm": 0.05499831214547157, "learning_rate": 0.01, "loss": 2.035, "step": 23862 }, { "epoch": 2.4529756398396545, "grad_norm": 0.03803296014666557, "learning_rate": 0.01, "loss": 1.9935, "step": 23865 }, { "epoch": 2.4532839962997226, "grad_norm": 0.11475666612386703, "learning_rate": 0.01, "loss": 2.0491, "step": 23868 }, { "epoch": 2.4535923527597903, "grad_norm": 0.13063554465770721, "learning_rate": 0.01, "loss": 1.9912, "step": 23871 }, { "epoch": 2.453900709219858, "grad_norm": 0.04982873052358627, "learning_rate": 0.01, "loss": 2.0718, "step": 23874 }, { "epoch": 2.454209065679926, "grad_norm": 0.04090685769915581, "learning_rate": 0.01, "loss": 2.0095, "step": 23877 }, { "epoch": 2.4545174221399937, "grad_norm": 0.044233810156583786, "learning_rate": 0.01, "loss": 2.0235, "step": 23880 }, { "epoch": 2.454825778600062, "grad_norm": 0.045451819896698, "learning_rate": 0.01, "loss": 2.0135, "step": 23883 }, { "epoch": 2.4551341350601295, "grad_norm": 0.04413032531738281, "learning_rate": 0.01, "loss": 2.0371, "step": 23886 }, { "epoch": 2.4554424915201976, "grad_norm": 0.07937499135732651, "learning_rate": 0.01, "loss": 2.0375, "step": 23889 }, { "epoch": 2.455750847980265, "grad_norm": 0.09389673918485641, "learning_rate": 0.01, "loss": 2.0265, "step": 23892 }, { "epoch": 2.456059204440333, "grad_norm": 0.061937954276800156, "learning_rate": 0.01, "loss": 2.0335, "step": 23895 }, { "epoch": 2.456367560900401, "grad_norm": 0.038002993911504745, "learning_rate": 0.01, "loss": 1.9868, "step": 23898 }, { "epoch": 2.4566759173604686, "grad_norm": 0.05679142847657204, "learning_rate": 0.01, "loss": 2.0346, "step": 23901 }, { "epoch": 2.4569842738205363, "grad_norm": 0.07343320548534393, "learning_rate": 0.01, "loss": 2.0105, "step": 23904 }, { "epoch": 2.4572926302806044, "grad_norm": 0.04992273077368736, "learning_rate": 0.01, "loss": 2.0327, "step": 23907 }, { "epoch": 2.457600986740672, "grad_norm": 0.07384973764419556, "learning_rate": 0.01, "loss": 2.0337, "step": 23910 }, { "epoch": 2.45790934320074, "grad_norm": 0.05728161707520485, "learning_rate": 0.01, "loss": 2.0036, "step": 23913 }, { "epoch": 2.458217699660808, "grad_norm": 0.06384929269552231, "learning_rate": 0.01, "loss": 2.0471, "step": 23916 }, { "epoch": 2.458526056120876, "grad_norm": 0.07515307515859604, "learning_rate": 0.01, "loss": 2.0485, "step": 23919 }, { "epoch": 2.4588344125809436, "grad_norm": 0.09443585574626923, "learning_rate": 0.01, "loss": 2.0183, "step": 23922 }, { "epoch": 2.4591427690410113, "grad_norm": 0.059930965304374695, "learning_rate": 0.01, "loss": 1.9955, "step": 23925 }, { "epoch": 2.4594511255010794, "grad_norm": 0.09930091351270676, "learning_rate": 0.01, "loss": 2.0269, "step": 23928 }, { "epoch": 2.459759481961147, "grad_norm": 0.03487955033779144, "learning_rate": 0.01, "loss": 2.003, "step": 23931 }, { "epoch": 2.4600678384212147, "grad_norm": 0.08312834799289703, "learning_rate": 0.01, "loss": 2.0118, "step": 23934 }, { "epoch": 2.460376194881283, "grad_norm": 0.06459874659776688, "learning_rate": 0.01, "loss": 2.0394, "step": 23937 }, { "epoch": 2.4606845513413504, "grad_norm": 0.10331536084413528, "learning_rate": 0.01, "loss": 2.0428, "step": 23940 }, { "epoch": 2.4609929078014185, "grad_norm": 0.06738618016242981, "learning_rate": 0.01, "loss": 2.0409, "step": 23943 }, { "epoch": 2.461301264261486, "grad_norm": 0.05211193859577179, "learning_rate": 0.01, "loss": 2.0296, "step": 23946 }, { "epoch": 2.4616096207215543, "grad_norm": 0.05060914158821106, "learning_rate": 0.01, "loss": 2.0518, "step": 23949 }, { "epoch": 2.461917977181622, "grad_norm": 0.037570733577013016, "learning_rate": 0.01, "loss": 1.9946, "step": 23952 }, { "epoch": 2.4622263336416896, "grad_norm": 0.06046308949589729, "learning_rate": 0.01, "loss": 2.0154, "step": 23955 }, { "epoch": 2.4625346901017577, "grad_norm": 0.08765476942062378, "learning_rate": 0.01, "loss": 2.0433, "step": 23958 }, { "epoch": 2.4628430465618254, "grad_norm": 0.07397017627954483, "learning_rate": 0.01, "loss": 2.0187, "step": 23961 }, { "epoch": 2.463151403021893, "grad_norm": 0.10959914326667786, "learning_rate": 0.01, "loss": 2.0076, "step": 23964 }, { "epoch": 2.463459759481961, "grad_norm": 0.04373926669359207, "learning_rate": 0.01, "loss": 2.0219, "step": 23967 }, { "epoch": 2.463768115942029, "grad_norm": 0.049903422594070435, "learning_rate": 0.01, "loss": 2.0046, "step": 23970 }, { "epoch": 2.464076472402097, "grad_norm": 0.037193864583969116, "learning_rate": 0.01, "loss": 2.012, "step": 23973 }, { "epoch": 2.4643848288621646, "grad_norm": 0.06762266159057617, "learning_rate": 0.01, "loss": 2.0179, "step": 23976 }, { "epoch": 2.4646931853222327, "grad_norm": 0.03918517008423805, "learning_rate": 0.01, "loss": 2.002, "step": 23979 }, { "epoch": 2.4650015417823004, "grad_norm": 0.09819602221250534, "learning_rate": 0.01, "loss": 2.0208, "step": 23982 }, { "epoch": 2.465309898242368, "grad_norm": 0.12719838321208954, "learning_rate": 0.01, "loss": 2.0155, "step": 23985 }, { "epoch": 2.465618254702436, "grad_norm": 0.05890420451760292, "learning_rate": 0.01, "loss": 2.0459, "step": 23988 }, { "epoch": 2.465926611162504, "grad_norm": 0.05101997032761574, "learning_rate": 0.01, "loss": 2.0266, "step": 23991 }, { "epoch": 2.466234967622572, "grad_norm": 0.04597810283303261, "learning_rate": 0.01, "loss": 2.0348, "step": 23994 }, { "epoch": 2.4665433240826395, "grad_norm": 0.0504877045750618, "learning_rate": 0.01, "loss": 2.0267, "step": 23997 }, { "epoch": 2.466851680542707, "grad_norm": 0.07908850163221359, "learning_rate": 0.01, "loss": 2.0215, "step": 24000 }, { "epoch": 2.4671600370027753, "grad_norm": 0.07396575808525085, "learning_rate": 0.01, "loss": 2.0287, "step": 24003 }, { "epoch": 2.467468393462843, "grad_norm": 0.06695767492055893, "learning_rate": 0.01, "loss": 2.0061, "step": 24006 }, { "epoch": 2.467776749922911, "grad_norm": 0.0907633900642395, "learning_rate": 0.01, "loss": 2.0261, "step": 24009 }, { "epoch": 2.4680851063829787, "grad_norm": 0.07585210353136063, "learning_rate": 0.01, "loss": 2.041, "step": 24012 }, { "epoch": 2.4683934628430464, "grad_norm": 0.049894414842128754, "learning_rate": 0.01, "loss": 2.0352, "step": 24015 }, { "epoch": 2.4687018193031145, "grad_norm": 0.038147564977407455, "learning_rate": 0.01, "loss": 2.043, "step": 24018 }, { "epoch": 2.469010175763182, "grad_norm": 0.03651060909032822, "learning_rate": 0.01, "loss": 2.0208, "step": 24021 }, { "epoch": 2.4693185322232503, "grad_norm": 0.09396708011627197, "learning_rate": 0.01, "loss": 2.0419, "step": 24024 }, { "epoch": 2.469626888683318, "grad_norm": 0.03474831208586693, "learning_rate": 0.01, "loss": 2.0217, "step": 24027 }, { "epoch": 2.4699352451433856, "grad_norm": 0.09970984607934952, "learning_rate": 0.01, "loss": 2.0348, "step": 24030 }, { "epoch": 2.4702436016034537, "grad_norm": 0.10633893311023712, "learning_rate": 0.01, "loss": 2.019, "step": 24033 }, { "epoch": 2.4705519580635213, "grad_norm": 0.0787937119603157, "learning_rate": 0.01, "loss": 2.0109, "step": 24036 }, { "epoch": 2.4708603145235895, "grad_norm": 0.05214501917362213, "learning_rate": 0.01, "loss": 2.0023, "step": 24039 }, { "epoch": 2.471168670983657, "grad_norm": 0.02967134490609169, "learning_rate": 0.01, "loss": 2.0043, "step": 24042 }, { "epoch": 2.4714770274437248, "grad_norm": 0.03181852400302887, "learning_rate": 0.01, "loss": 2.0164, "step": 24045 }, { "epoch": 2.471785383903793, "grad_norm": 0.03905104100704193, "learning_rate": 0.01, "loss": 2.0194, "step": 24048 }, { "epoch": 2.4720937403638605, "grad_norm": 0.051532018929719925, "learning_rate": 0.01, "loss": 2.0045, "step": 24051 }, { "epoch": 2.4724020968239286, "grad_norm": 0.13418106734752655, "learning_rate": 0.01, "loss": 2.0486, "step": 24054 }, { "epoch": 2.4727104532839963, "grad_norm": 0.0745147094130516, "learning_rate": 0.01, "loss": 2.0448, "step": 24057 }, { "epoch": 2.473018809744064, "grad_norm": 0.06666639447212219, "learning_rate": 0.01, "loss": 2.0274, "step": 24060 }, { "epoch": 2.473327166204132, "grad_norm": 0.08356054127216339, "learning_rate": 0.01, "loss": 2.028, "step": 24063 }, { "epoch": 2.4736355226641997, "grad_norm": 0.04733874648809433, "learning_rate": 0.01, "loss": 2.0294, "step": 24066 }, { "epoch": 2.473943879124268, "grad_norm": 0.07551899552345276, "learning_rate": 0.01, "loss": 2.0389, "step": 24069 }, { "epoch": 2.4742522355843355, "grad_norm": 0.03114013373851776, "learning_rate": 0.01, "loss": 2.0454, "step": 24072 }, { "epoch": 2.474560592044403, "grad_norm": 0.062112826853990555, "learning_rate": 0.01, "loss": 2.048, "step": 24075 }, { "epoch": 2.4748689485044713, "grad_norm": 0.0542120561003685, "learning_rate": 0.01, "loss": 2.0198, "step": 24078 }, { "epoch": 2.475177304964539, "grad_norm": 0.06153399124741554, "learning_rate": 0.01, "loss": 2.0264, "step": 24081 }, { "epoch": 2.475485661424607, "grad_norm": 0.06306985020637512, "learning_rate": 0.01, "loss": 2.0425, "step": 24084 }, { "epoch": 2.4757940178846747, "grad_norm": 0.03326687961816788, "learning_rate": 0.01, "loss": 2.0211, "step": 24087 }, { "epoch": 2.4761023743447423, "grad_norm": 0.10279777646064758, "learning_rate": 0.01, "loss": 2.0483, "step": 24090 }, { "epoch": 2.4764107308048104, "grad_norm": 0.045618560165166855, "learning_rate": 0.01, "loss": 2.0476, "step": 24093 }, { "epoch": 2.476719087264878, "grad_norm": 0.05065792426466942, "learning_rate": 0.01, "loss": 2.0261, "step": 24096 }, { "epoch": 2.477027443724946, "grad_norm": 0.04114675521850586, "learning_rate": 0.01, "loss": 2.0324, "step": 24099 }, { "epoch": 2.477335800185014, "grad_norm": 0.11044265329837799, "learning_rate": 0.01, "loss": 2.0379, "step": 24102 }, { "epoch": 2.4776441566450815, "grad_norm": 0.053481362760066986, "learning_rate": 0.01, "loss": 2.0408, "step": 24105 }, { "epoch": 2.4779525131051496, "grad_norm": 0.14947016537189484, "learning_rate": 0.01, "loss": 2.0511, "step": 24108 }, { "epoch": 2.4782608695652173, "grad_norm": 0.08009488135576248, "learning_rate": 0.01, "loss": 2.0383, "step": 24111 }, { "epoch": 2.4785692260252854, "grad_norm": 0.06397935748100281, "learning_rate": 0.01, "loss": 2.0445, "step": 24114 }, { "epoch": 2.478877582485353, "grad_norm": 0.0409528985619545, "learning_rate": 0.01, "loss": 2.0386, "step": 24117 }, { "epoch": 2.4791859389454207, "grad_norm": 0.11498477309942245, "learning_rate": 0.01, "loss": 1.9942, "step": 24120 }, { "epoch": 2.479494295405489, "grad_norm": 0.02937469258904457, "learning_rate": 0.01, "loss": 2.0329, "step": 24123 }, { "epoch": 2.4798026518655565, "grad_norm": 0.03659180551767349, "learning_rate": 0.01, "loss": 2.0638, "step": 24126 }, { "epoch": 2.4801110083256246, "grad_norm": 0.10866084694862366, "learning_rate": 0.01, "loss": 2.0319, "step": 24129 }, { "epoch": 2.4804193647856922, "grad_norm": 0.07002250105142593, "learning_rate": 0.01, "loss": 2.03, "step": 24132 }, { "epoch": 2.48072772124576, "grad_norm": 0.07129926234483719, "learning_rate": 0.01, "loss": 2.0407, "step": 24135 }, { "epoch": 2.481036077705828, "grad_norm": 0.061107151210308075, "learning_rate": 0.01, "loss": 2.0288, "step": 24138 }, { "epoch": 2.4813444341658957, "grad_norm": 0.04937992990016937, "learning_rate": 0.01, "loss": 2.0492, "step": 24141 }, { "epoch": 2.481652790625964, "grad_norm": 0.04651058465242386, "learning_rate": 0.01, "loss": 2.0474, "step": 24144 }, { "epoch": 2.4819611470860314, "grad_norm": 0.06658688187599182, "learning_rate": 0.01, "loss": 2.0429, "step": 24147 }, { "epoch": 2.482269503546099, "grad_norm": 0.05241367965936661, "learning_rate": 0.01, "loss": 2.0193, "step": 24150 }, { "epoch": 2.482577860006167, "grad_norm": 0.03564739227294922, "learning_rate": 0.01, "loss": 2.0092, "step": 24153 }, { "epoch": 2.482886216466235, "grad_norm": 0.03984629735350609, "learning_rate": 0.01, "loss": 2.0302, "step": 24156 }, { "epoch": 2.483194572926303, "grad_norm": 0.10245262831449509, "learning_rate": 0.01, "loss": 2.0327, "step": 24159 }, { "epoch": 2.4835029293863706, "grad_norm": 0.0727042481303215, "learning_rate": 0.01, "loss": 2.0295, "step": 24162 }, { "epoch": 2.4838112858464383, "grad_norm": 0.041190728545188904, "learning_rate": 0.01, "loss": 1.9987, "step": 24165 }, { "epoch": 2.4841196423065064, "grad_norm": 0.04986109584569931, "learning_rate": 0.01, "loss": 2.0227, "step": 24168 }, { "epoch": 2.484427998766574, "grad_norm": 0.0689210444688797, "learning_rate": 0.01, "loss": 2.0262, "step": 24171 }, { "epoch": 2.484736355226642, "grad_norm": 0.043997250497341156, "learning_rate": 0.01, "loss": 2.007, "step": 24174 }, { "epoch": 2.48504471168671, "grad_norm": 0.06231982633471489, "learning_rate": 0.01, "loss": 2.0095, "step": 24177 }, { "epoch": 2.485353068146778, "grad_norm": 0.09958053380250931, "learning_rate": 0.01, "loss": 2.022, "step": 24180 }, { "epoch": 2.4856614246068456, "grad_norm": 0.05297970771789551, "learning_rate": 0.01, "loss": 2.0416, "step": 24183 }, { "epoch": 2.4859697810669132, "grad_norm": 0.07420172542333603, "learning_rate": 0.01, "loss": 1.988, "step": 24186 }, { "epoch": 2.4862781375269813, "grad_norm": 0.049017585813999176, "learning_rate": 0.01, "loss": 2.0195, "step": 24189 }, { "epoch": 2.486586493987049, "grad_norm": 0.05398377403616905, "learning_rate": 0.01, "loss": 2.0161, "step": 24192 }, { "epoch": 2.4868948504471167, "grad_norm": 0.03338189795613289, "learning_rate": 0.01, "loss": 2.0073, "step": 24195 }, { "epoch": 2.4872032069071848, "grad_norm": 0.1013825535774231, "learning_rate": 0.01, "loss": 2.0372, "step": 24198 }, { "epoch": 2.4875115633672524, "grad_norm": 0.06101495400071144, "learning_rate": 0.01, "loss": 2.0313, "step": 24201 }, { "epoch": 2.4878199198273205, "grad_norm": 0.06915189325809479, "learning_rate": 0.01, "loss": 1.9753, "step": 24204 }, { "epoch": 2.488128276287388, "grad_norm": 0.09961054474115372, "learning_rate": 0.01, "loss": 2.0183, "step": 24207 }, { "epoch": 2.4884366327474563, "grad_norm": 0.039923045784235, "learning_rate": 0.01, "loss": 2.0256, "step": 24210 }, { "epoch": 2.488744989207524, "grad_norm": 0.07982566952705383, "learning_rate": 0.01, "loss": 2.0225, "step": 24213 }, { "epoch": 2.4890533456675916, "grad_norm": 0.06360599398612976, "learning_rate": 0.01, "loss": 2.0126, "step": 24216 }, { "epoch": 2.4893617021276597, "grad_norm": 0.06489767879247665, "learning_rate": 0.01, "loss": 2.0615, "step": 24219 }, { "epoch": 2.4896700585877274, "grad_norm": 0.08873300999403, "learning_rate": 0.01, "loss": 2.0144, "step": 24222 }, { "epoch": 2.489978415047795, "grad_norm": 0.04309386387467384, "learning_rate": 0.01, "loss": 2.0478, "step": 24225 }, { "epoch": 2.490286771507863, "grad_norm": 0.042991675436496735, "learning_rate": 0.01, "loss": 2.0046, "step": 24228 }, { "epoch": 2.490595127967931, "grad_norm": 0.04507692903280258, "learning_rate": 0.01, "loss": 2.0417, "step": 24231 }, { "epoch": 2.490903484427999, "grad_norm": 0.09288784116506577, "learning_rate": 0.01, "loss": 2.0181, "step": 24234 }, { "epoch": 2.4912118408880666, "grad_norm": 0.040776100009679794, "learning_rate": 0.01, "loss": 2.0524, "step": 24237 }, { "epoch": 2.4915201973481347, "grad_norm": 0.09847230464220047, "learning_rate": 0.01, "loss": 2.0298, "step": 24240 }, { "epoch": 2.4918285538082023, "grad_norm": 0.05252716690301895, "learning_rate": 0.01, "loss": 2.0288, "step": 24243 }, { "epoch": 2.49213691026827, "grad_norm": 0.04028663411736488, "learning_rate": 0.01, "loss": 2.0351, "step": 24246 }, { "epoch": 2.492445266728338, "grad_norm": 0.04721330851316452, "learning_rate": 0.01, "loss": 2.045, "step": 24249 }, { "epoch": 2.4927536231884058, "grad_norm": 0.0453517884016037, "learning_rate": 0.01, "loss": 2.0264, "step": 24252 }, { "epoch": 2.4930619796484734, "grad_norm": 0.09120344370603561, "learning_rate": 0.01, "loss": 2.0189, "step": 24255 }, { "epoch": 2.4933703361085415, "grad_norm": 0.1132507398724556, "learning_rate": 0.01, "loss": 2.0109, "step": 24258 }, { "epoch": 2.493678692568609, "grad_norm": 0.07698217034339905, "learning_rate": 0.01, "loss": 2.0391, "step": 24261 }, { "epoch": 2.4939870490286773, "grad_norm": 0.04104442894458771, "learning_rate": 0.01, "loss": 2.0357, "step": 24264 }, { "epoch": 2.494295405488745, "grad_norm": 0.04681272804737091, "learning_rate": 0.01, "loss": 2.0142, "step": 24267 }, { "epoch": 2.494603761948813, "grad_norm": 0.05137484520673752, "learning_rate": 0.01, "loss": 2.0616, "step": 24270 }, { "epoch": 2.4949121184088807, "grad_norm": 0.05654723569750786, "learning_rate": 0.01, "loss": 2.0371, "step": 24273 }, { "epoch": 2.4952204748689484, "grad_norm": 0.052294518798589706, "learning_rate": 0.01, "loss": 2.0163, "step": 24276 }, { "epoch": 2.4955288313290165, "grad_norm": 0.06906304508447647, "learning_rate": 0.01, "loss": 2.0049, "step": 24279 }, { "epoch": 2.495837187789084, "grad_norm": 0.0664992555975914, "learning_rate": 0.01, "loss": 2.037, "step": 24282 }, { "epoch": 2.496145544249152, "grad_norm": 0.06502712517976761, "learning_rate": 0.01, "loss": 2.0439, "step": 24285 }, { "epoch": 2.49645390070922, "grad_norm": 0.047290291637182236, "learning_rate": 0.01, "loss": 2.0267, "step": 24288 }, { "epoch": 2.4967622571692876, "grad_norm": 0.07271420210599899, "learning_rate": 0.01, "loss": 2.0325, "step": 24291 }, { "epoch": 2.4970706136293557, "grad_norm": 0.042307768017053604, "learning_rate": 0.01, "loss": 2.0223, "step": 24294 }, { "epoch": 2.4973789700894233, "grad_norm": 0.04834264889359474, "learning_rate": 0.01, "loss": 2.0278, "step": 24297 }, { "epoch": 2.4976873265494914, "grad_norm": 0.045435305684804916, "learning_rate": 0.01, "loss": 1.9997, "step": 24300 }, { "epoch": 2.497995683009559, "grad_norm": 0.07511632144451141, "learning_rate": 0.01, "loss": 2.025, "step": 24303 }, { "epoch": 2.4983040394696268, "grad_norm": 0.14621250331401825, "learning_rate": 0.01, "loss": 2.0401, "step": 24306 }, { "epoch": 2.498612395929695, "grad_norm": 0.05969877541065216, "learning_rate": 0.01, "loss": 2.0309, "step": 24309 }, { "epoch": 2.4989207523897625, "grad_norm": 0.040014345198869705, "learning_rate": 0.01, "loss": 2.0301, "step": 24312 }, { "epoch": 2.49922910884983, "grad_norm": 0.05185026675462723, "learning_rate": 0.01, "loss": 2.0149, "step": 24315 }, { "epoch": 2.4995374653098983, "grad_norm": 0.04459863528609276, "learning_rate": 0.01, "loss": 2.03, "step": 24318 }, { "epoch": 2.499845821769966, "grad_norm": 0.06208227947354317, "learning_rate": 0.01, "loss": 2.0253, "step": 24321 }, { "epoch": 2.500154178230034, "grad_norm": 0.0788293108344078, "learning_rate": 0.01, "loss": 2.0154, "step": 24324 }, { "epoch": 2.5004625346901017, "grad_norm": 0.0636831745505333, "learning_rate": 0.01, "loss": 2.0522, "step": 24327 }, { "epoch": 2.50077089115017, "grad_norm": 0.05828903615474701, "learning_rate": 0.01, "loss": 2.0093, "step": 24330 }, { "epoch": 2.5010792476102375, "grad_norm": 0.06897569447755814, "learning_rate": 0.01, "loss": 1.9997, "step": 24333 }, { "epoch": 2.501387604070305, "grad_norm": 0.03793172910809517, "learning_rate": 0.01, "loss": 2.0209, "step": 24336 }, { "epoch": 2.5016959605303732, "grad_norm": 0.05384537950158119, "learning_rate": 0.01, "loss": 2.0355, "step": 24339 }, { "epoch": 2.502004316990441, "grad_norm": 0.07979964464902878, "learning_rate": 0.01, "loss": 2.023, "step": 24342 }, { "epoch": 2.5023126734505086, "grad_norm": 0.054392259567976, "learning_rate": 0.01, "loss": 2.0115, "step": 24345 }, { "epoch": 2.5026210299105767, "grad_norm": 0.06897418200969696, "learning_rate": 0.01, "loss": 2.0147, "step": 24348 }, { "epoch": 2.5029293863706443, "grad_norm": 0.0801873579621315, "learning_rate": 0.01, "loss": 2.0397, "step": 24351 }, { "epoch": 2.5032377428307124, "grad_norm": 0.03708551451563835, "learning_rate": 0.01, "loss": 2.0076, "step": 24354 }, { "epoch": 2.50354609929078, "grad_norm": 0.09656143933534622, "learning_rate": 0.01, "loss": 2.0463, "step": 24357 }, { "epoch": 2.503854455750848, "grad_norm": 0.0770551860332489, "learning_rate": 0.01, "loss": 2.0355, "step": 24360 }, { "epoch": 2.504162812210916, "grad_norm": 0.08103878796100616, "learning_rate": 0.01, "loss": 2.0262, "step": 24363 }, { "epoch": 2.5044711686709835, "grad_norm": 0.10928300768136978, "learning_rate": 0.01, "loss": 2.0389, "step": 24366 }, { "epoch": 2.5047795251310516, "grad_norm": 0.07541976869106293, "learning_rate": 0.01, "loss": 2.0242, "step": 24369 }, { "epoch": 2.5050878815911193, "grad_norm": 0.05937611311674118, "learning_rate": 0.01, "loss": 2.0395, "step": 24372 }, { "epoch": 2.505396238051187, "grad_norm": 0.05396249517798424, "learning_rate": 0.01, "loss": 2.0164, "step": 24375 }, { "epoch": 2.505704594511255, "grad_norm": 0.04004419967532158, "learning_rate": 0.01, "loss": 2.0174, "step": 24378 }, { "epoch": 2.5060129509713227, "grad_norm": 0.05242707580327988, "learning_rate": 0.01, "loss": 2.0119, "step": 24381 }, { "epoch": 2.506321307431391, "grad_norm": 0.038752540946006775, "learning_rate": 0.01, "loss": 2.0181, "step": 24384 }, { "epoch": 2.5066296638914585, "grad_norm": 0.07296596467494965, "learning_rate": 0.01, "loss": 2.0027, "step": 24387 }, { "epoch": 2.5069380203515266, "grad_norm": 0.116209976375103, "learning_rate": 0.01, "loss": 2.0205, "step": 24390 }, { "epoch": 2.5072463768115942, "grad_norm": 0.09165041148662567, "learning_rate": 0.01, "loss": 2.0271, "step": 24393 }, { "epoch": 2.507554733271662, "grad_norm": 0.14264173805713654, "learning_rate": 0.01, "loss": 2.0217, "step": 24396 }, { "epoch": 2.50786308973173, "grad_norm": 0.045917656272649765, "learning_rate": 0.01, "loss": 2.0018, "step": 24399 }, { "epoch": 2.5081714461917977, "grad_norm": 0.057148344814777374, "learning_rate": 0.01, "loss": 2.0295, "step": 24402 }, { "epoch": 2.5084798026518653, "grad_norm": 0.03626836836338043, "learning_rate": 0.01, "loss": 2.0197, "step": 24405 }, { "epoch": 2.5087881591119334, "grad_norm": 0.03621996194124222, "learning_rate": 0.01, "loss": 2.0307, "step": 24408 }, { "epoch": 2.509096515572001, "grad_norm": 0.05835467949509621, "learning_rate": 0.01, "loss": 2.0506, "step": 24411 }, { "epoch": 2.509404872032069, "grad_norm": 0.03973361849784851, "learning_rate": 0.01, "loss": 2.0076, "step": 24414 }, { "epoch": 2.509713228492137, "grad_norm": 0.09463023394346237, "learning_rate": 0.01, "loss": 2.0254, "step": 24417 }, { "epoch": 2.510021584952205, "grad_norm": 0.07552462071180344, "learning_rate": 0.01, "loss": 2.0182, "step": 24420 }, { "epoch": 2.5103299414122726, "grad_norm": 0.1288609802722931, "learning_rate": 0.01, "loss": 2.0337, "step": 24423 }, { "epoch": 2.5106382978723403, "grad_norm": 0.06943691521883011, "learning_rate": 0.01, "loss": 2.0162, "step": 24426 }, { "epoch": 2.5109466543324084, "grad_norm": 0.08581320196390152, "learning_rate": 0.01, "loss": 2.0292, "step": 24429 }, { "epoch": 2.511255010792476, "grad_norm": 0.07379914820194244, "learning_rate": 0.01, "loss": 2.0328, "step": 24432 }, { "epoch": 2.5115633672525437, "grad_norm": 0.09235703945159912, "learning_rate": 0.01, "loss": 2.0194, "step": 24435 }, { "epoch": 2.511871723712612, "grad_norm": 0.05038774758577347, "learning_rate": 0.01, "loss": 2.0249, "step": 24438 }, { "epoch": 2.5121800801726795, "grad_norm": 0.08711019903421402, "learning_rate": 0.01, "loss": 2.0276, "step": 24441 }, { "epoch": 2.5124884366327476, "grad_norm": 0.05432825908064842, "learning_rate": 0.01, "loss": 2.0251, "step": 24444 }, { "epoch": 2.5127967930928152, "grad_norm": 0.10115527361631393, "learning_rate": 0.01, "loss": 2.016, "step": 24447 }, { "epoch": 2.5131051495528833, "grad_norm": 0.10387013852596283, "learning_rate": 0.01, "loss": 2.0198, "step": 24450 }, { "epoch": 2.513413506012951, "grad_norm": 0.0740542933344841, "learning_rate": 0.01, "loss": 2.0179, "step": 24453 }, { "epoch": 2.5137218624730187, "grad_norm": 0.07834311574697495, "learning_rate": 0.01, "loss": 2.0731, "step": 24456 }, { "epoch": 2.5140302189330868, "grad_norm": 0.06743736565113068, "learning_rate": 0.01, "loss": 2.0588, "step": 24459 }, { "epoch": 2.5143385753931544, "grad_norm": 0.051791246980428696, "learning_rate": 0.01, "loss": 2.0063, "step": 24462 }, { "epoch": 2.514646931853222, "grad_norm": 0.0731598362326622, "learning_rate": 0.01, "loss": 2.0271, "step": 24465 }, { "epoch": 2.51495528831329, "grad_norm": 0.04995987191796303, "learning_rate": 0.01, "loss": 2.0224, "step": 24468 }, { "epoch": 2.515263644773358, "grad_norm": 0.04953973367810249, "learning_rate": 0.01, "loss": 2.0176, "step": 24471 }, { "epoch": 2.515572001233426, "grad_norm": 0.05432116240262985, "learning_rate": 0.01, "loss": 2.0349, "step": 24474 }, { "epoch": 2.5158803576934936, "grad_norm": 0.048791225999593735, "learning_rate": 0.01, "loss": 2.0296, "step": 24477 }, { "epoch": 2.5161887141535617, "grad_norm": 0.11742904037237167, "learning_rate": 0.01, "loss": 2.04, "step": 24480 }, { "epoch": 2.5164970706136294, "grad_norm": 0.12617075443267822, "learning_rate": 0.01, "loss": 2.0214, "step": 24483 }, { "epoch": 2.516805427073697, "grad_norm": 0.051573995500802994, "learning_rate": 0.01, "loss": 2.0662, "step": 24486 }, { "epoch": 2.517113783533765, "grad_norm": 0.09131506085395813, "learning_rate": 0.01, "loss": 2.0487, "step": 24489 }, { "epoch": 2.517422139993833, "grad_norm": 0.06593006104230881, "learning_rate": 0.01, "loss": 2.0369, "step": 24492 }, { "epoch": 2.5177304964539005, "grad_norm": 0.038310687988996506, "learning_rate": 0.01, "loss": 1.9954, "step": 24495 }, { "epoch": 2.5180388529139686, "grad_norm": 0.05975675210356712, "learning_rate": 0.01, "loss": 2.028, "step": 24498 }, { "epoch": 2.518347209374036, "grad_norm": 0.04541294649243355, "learning_rate": 0.01, "loss": 2.0285, "step": 24501 }, { "epoch": 2.5186555658341043, "grad_norm": 0.053723473101854324, "learning_rate": 0.01, "loss": 2.0029, "step": 24504 }, { "epoch": 2.518963922294172, "grad_norm": 0.030095964670181274, "learning_rate": 0.01, "loss": 2.0401, "step": 24507 }, { "epoch": 2.51927227875424, "grad_norm": 0.10244923830032349, "learning_rate": 0.01, "loss": 2.0348, "step": 24510 }, { "epoch": 2.5195806352143078, "grad_norm": 0.06249944120645523, "learning_rate": 0.01, "loss": 2.0449, "step": 24513 }, { "epoch": 2.5198889916743754, "grad_norm": 0.08720767498016357, "learning_rate": 0.01, "loss": 2.036, "step": 24516 }, { "epoch": 2.5201973481344435, "grad_norm": 0.07686194777488708, "learning_rate": 0.01, "loss": 2.0151, "step": 24519 }, { "epoch": 2.520505704594511, "grad_norm": 0.08837150782346725, "learning_rate": 0.01, "loss": 2.0414, "step": 24522 }, { "epoch": 2.520814061054579, "grad_norm": 0.0794796347618103, "learning_rate": 0.01, "loss": 2.027, "step": 24525 }, { "epoch": 2.521122417514647, "grad_norm": 0.05655858293175697, "learning_rate": 0.01, "loss": 2.0336, "step": 24528 }, { "epoch": 2.521430773974715, "grad_norm": 0.08295401185750961, "learning_rate": 0.01, "loss": 2.0007, "step": 24531 }, { "epoch": 2.5217391304347827, "grad_norm": 0.03982521593570709, "learning_rate": 0.01, "loss": 2.0288, "step": 24534 }, { "epoch": 2.5220474868948504, "grad_norm": 0.04791923984885216, "learning_rate": 0.01, "loss": 2.038, "step": 24537 }, { "epoch": 2.5223558433549185, "grad_norm": 0.11436691880226135, "learning_rate": 0.01, "loss": 2.046, "step": 24540 }, { "epoch": 2.522664199814986, "grad_norm": 0.1064198762178421, "learning_rate": 0.01, "loss": 2.0144, "step": 24543 }, { "epoch": 2.522972556275054, "grad_norm": 0.08036024123430252, "learning_rate": 0.01, "loss": 2.0152, "step": 24546 }, { "epoch": 2.523280912735122, "grad_norm": 0.061799556016922, "learning_rate": 0.01, "loss": 2.0165, "step": 24549 }, { "epoch": 2.5235892691951896, "grad_norm": 0.04592469707131386, "learning_rate": 0.01, "loss": 2.0061, "step": 24552 }, { "epoch": 2.523897625655257, "grad_norm": 0.036766473203897476, "learning_rate": 0.01, "loss": 1.9913, "step": 24555 }, { "epoch": 2.5242059821153253, "grad_norm": 0.09619138389825821, "learning_rate": 0.01, "loss": 2.0264, "step": 24558 }, { "epoch": 2.5245143385753934, "grad_norm": 0.03915918245911598, "learning_rate": 0.01, "loss": 2.0098, "step": 24561 }, { "epoch": 2.524822695035461, "grad_norm": 0.04883084446191788, "learning_rate": 0.01, "loss": 2.0298, "step": 24564 }, { "epoch": 2.5251310514955287, "grad_norm": 0.05630512908101082, "learning_rate": 0.01, "loss": 2.0117, "step": 24567 }, { "epoch": 2.525439407955597, "grad_norm": 0.04064425081014633, "learning_rate": 0.01, "loss": 2.0434, "step": 24570 }, { "epoch": 2.5257477644156645, "grad_norm": 0.05302917957305908, "learning_rate": 0.01, "loss": 2.044, "step": 24573 }, { "epoch": 2.526056120875732, "grad_norm": 0.07677201181650162, "learning_rate": 0.01, "loss": 2.0294, "step": 24576 }, { "epoch": 2.5263644773358003, "grad_norm": 0.07950242608785629, "learning_rate": 0.01, "loss": 2.0006, "step": 24579 }, { "epoch": 2.526672833795868, "grad_norm": 0.07068518549203873, "learning_rate": 0.01, "loss": 2.0278, "step": 24582 }, { "epoch": 2.5269811902559356, "grad_norm": 0.08623625338077545, "learning_rate": 0.01, "loss": 2.0369, "step": 24585 }, { "epoch": 2.5272895467160037, "grad_norm": 0.05549190193414688, "learning_rate": 0.01, "loss": 2.0382, "step": 24588 }, { "epoch": 2.527597903176072, "grad_norm": 0.05710297450423241, "learning_rate": 0.01, "loss": 2.0209, "step": 24591 }, { "epoch": 2.5279062596361395, "grad_norm": 0.05071646347641945, "learning_rate": 0.01, "loss": 2.0066, "step": 24594 }, { "epoch": 2.528214616096207, "grad_norm": 0.09765972197055817, "learning_rate": 0.01, "loss": 2.0468, "step": 24597 }, { "epoch": 2.5285229725562752, "grad_norm": 0.05874921754002571, "learning_rate": 0.01, "loss": 2.0306, "step": 24600 }, { "epoch": 2.528831329016343, "grad_norm": 0.10598991811275482, "learning_rate": 0.01, "loss": 2.0156, "step": 24603 }, { "epoch": 2.5291396854764105, "grad_norm": 0.07071609050035477, "learning_rate": 0.01, "loss": 2.0284, "step": 24606 }, { "epoch": 2.5294480419364787, "grad_norm": 0.07132923603057861, "learning_rate": 0.01, "loss": 2.0004, "step": 24609 }, { "epoch": 2.5297563983965463, "grad_norm": 0.06741276383399963, "learning_rate": 0.01, "loss": 2.0247, "step": 24612 }, { "epoch": 2.530064754856614, "grad_norm": 0.10399371385574341, "learning_rate": 0.01, "loss": 2.033, "step": 24615 }, { "epoch": 2.530373111316682, "grad_norm": 0.054513610899448395, "learning_rate": 0.01, "loss": 2.0364, "step": 24618 }, { "epoch": 2.53068146777675, "grad_norm": 0.03990021347999573, "learning_rate": 0.01, "loss": 2.019, "step": 24621 }, { "epoch": 2.530989824236818, "grad_norm": 0.0329439677298069, "learning_rate": 0.01, "loss": 2.0653, "step": 24624 }, { "epoch": 2.5312981806968855, "grad_norm": 0.08065532892942429, "learning_rate": 0.01, "loss": 2.0195, "step": 24627 }, { "epoch": 2.5316065371569536, "grad_norm": 0.04455409198999405, "learning_rate": 0.01, "loss": 2.0383, "step": 24630 }, { "epoch": 2.5319148936170213, "grad_norm": 0.09395566582679749, "learning_rate": 0.01, "loss": 2.026, "step": 24633 }, { "epoch": 2.532223250077089, "grad_norm": 0.04042106121778488, "learning_rate": 0.01, "loss": 2.0272, "step": 24636 }, { "epoch": 2.532531606537157, "grad_norm": 0.09208521991968155, "learning_rate": 0.01, "loss": 2.0265, "step": 24639 }, { "epoch": 2.5328399629972247, "grad_norm": 0.06603435426950455, "learning_rate": 0.01, "loss": 2.044, "step": 24642 }, { "epoch": 2.533148319457293, "grad_norm": 0.039963483810424805, "learning_rate": 0.01, "loss": 2.0491, "step": 24645 }, { "epoch": 2.5334566759173605, "grad_norm": 0.14821624755859375, "learning_rate": 0.01, "loss": 2.0013, "step": 24648 }, { "epoch": 2.5337650323774286, "grad_norm": 0.06644035130739212, "learning_rate": 0.01, "loss": 2.0433, "step": 24651 }, { "epoch": 2.534073388837496, "grad_norm": 0.0375928059220314, "learning_rate": 0.01, "loss": 2.0264, "step": 24654 }, { "epoch": 2.534381745297564, "grad_norm": 0.06041393801569939, "learning_rate": 0.01, "loss": 2.0198, "step": 24657 }, { "epoch": 2.534690101757632, "grad_norm": 0.06117352098226547, "learning_rate": 0.01, "loss": 2.0148, "step": 24660 }, { "epoch": 2.5349984582176996, "grad_norm": 0.05386986956000328, "learning_rate": 0.01, "loss": 2.0521, "step": 24663 }, { "epoch": 2.5353068146777673, "grad_norm": 0.03399750217795372, "learning_rate": 0.01, "loss": 2.0223, "step": 24666 }, { "epoch": 2.5356151711378354, "grad_norm": 0.06256785988807678, "learning_rate": 0.01, "loss": 2.0372, "step": 24669 }, { "epoch": 2.535923527597903, "grad_norm": 0.08575739711523056, "learning_rate": 0.01, "loss": 2.0214, "step": 24672 }, { "epoch": 2.536231884057971, "grad_norm": 0.0895959809422493, "learning_rate": 0.01, "loss": 2.0339, "step": 24675 }, { "epoch": 2.536540240518039, "grad_norm": 0.06579075753688812, "learning_rate": 0.01, "loss": 2.0363, "step": 24678 }, { "epoch": 2.536848596978107, "grad_norm": 0.04509506747126579, "learning_rate": 0.01, "loss": 2.0331, "step": 24681 }, { "epoch": 2.5371569534381746, "grad_norm": 0.03535350412130356, "learning_rate": 0.01, "loss": 2.0262, "step": 24684 }, { "epoch": 2.5374653098982423, "grad_norm": 0.03496406227350235, "learning_rate": 0.01, "loss": 2.0183, "step": 24687 }, { "epoch": 2.5377736663583104, "grad_norm": 0.04595872759819031, "learning_rate": 0.01, "loss": 2.0376, "step": 24690 }, { "epoch": 2.538082022818378, "grad_norm": 0.07009676098823547, "learning_rate": 0.01, "loss": 2.0236, "step": 24693 }, { "epoch": 2.5383903792784457, "grad_norm": 0.07328460365533829, "learning_rate": 0.01, "loss": 2.0163, "step": 24696 }, { "epoch": 2.538698735738514, "grad_norm": 0.09521552175283432, "learning_rate": 0.01, "loss": 2.0288, "step": 24699 }, { "epoch": 2.5390070921985815, "grad_norm": 0.09087500721216202, "learning_rate": 0.01, "loss": 2.0524, "step": 24702 }, { "epoch": 2.5393154486586496, "grad_norm": 0.05657880753278732, "learning_rate": 0.01, "loss": 2.0336, "step": 24705 }, { "epoch": 2.539623805118717, "grad_norm": 0.13524407148361206, "learning_rate": 0.01, "loss": 2.0245, "step": 24708 }, { "epoch": 2.5399321615787853, "grad_norm": 0.04498621076345444, "learning_rate": 0.01, "loss": 2.0101, "step": 24711 }, { "epoch": 2.540240518038853, "grad_norm": 0.04117140173912048, "learning_rate": 0.01, "loss": 2.0149, "step": 24714 }, { "epoch": 2.5405488744989206, "grad_norm": 0.03630746528506279, "learning_rate": 0.01, "loss": 2.0323, "step": 24717 }, { "epoch": 2.5408572309589887, "grad_norm": 0.03791969269514084, "learning_rate": 0.01, "loss": 2.0142, "step": 24720 }, { "epoch": 2.5411655874190564, "grad_norm": 0.045213595032691956, "learning_rate": 0.01, "loss": 2.0136, "step": 24723 }, { "epoch": 2.541473943879124, "grad_norm": 0.08232447504997253, "learning_rate": 0.01, "loss": 2.0101, "step": 24726 }, { "epoch": 2.541782300339192, "grad_norm": 0.0790674090385437, "learning_rate": 0.01, "loss": 2.0282, "step": 24729 }, { "epoch": 2.54209065679926, "grad_norm": 0.09643759578466415, "learning_rate": 0.01, "loss": 2.008, "step": 24732 }, { "epoch": 2.542399013259328, "grad_norm": 0.09790430217981339, "learning_rate": 0.01, "loss": 1.9987, "step": 24735 }, { "epoch": 2.5427073697193956, "grad_norm": 0.04904096946120262, "learning_rate": 0.01, "loss": 2.0366, "step": 24738 }, { "epoch": 2.5430157261794637, "grad_norm": 0.042802706360816956, "learning_rate": 0.01, "loss": 2.0548, "step": 24741 }, { "epoch": 2.5433240826395314, "grad_norm": 0.04947663098573685, "learning_rate": 0.01, "loss": 2.03, "step": 24744 }, { "epoch": 2.543632439099599, "grad_norm": 0.040841687470674515, "learning_rate": 0.01, "loss": 2.0246, "step": 24747 }, { "epoch": 2.543940795559667, "grad_norm": 0.051419809460639954, "learning_rate": 0.01, "loss": 2.0352, "step": 24750 }, { "epoch": 2.544249152019735, "grad_norm": 0.07173865288496017, "learning_rate": 0.01, "loss": 2.0396, "step": 24753 }, { "epoch": 2.5445575084798024, "grad_norm": 0.07664339989423752, "learning_rate": 0.01, "loss": 2.0209, "step": 24756 }, { "epoch": 2.5448658649398705, "grad_norm": 0.05180468037724495, "learning_rate": 0.01, "loss": 2.0375, "step": 24759 }, { "epoch": 2.545174221399938, "grad_norm": 0.03839515894651413, "learning_rate": 0.01, "loss": 2.0408, "step": 24762 }, { "epoch": 2.5454825778600063, "grad_norm": 0.08712394535541534, "learning_rate": 0.01, "loss": 2.0303, "step": 24765 }, { "epoch": 2.545790934320074, "grad_norm": 0.06906873732805252, "learning_rate": 0.01, "loss": 2.0103, "step": 24768 }, { "epoch": 2.546099290780142, "grad_norm": 0.04779994115233421, "learning_rate": 0.01, "loss": 2.0007, "step": 24771 }, { "epoch": 2.5464076472402097, "grad_norm": 0.03945513069629669, "learning_rate": 0.01, "loss": 2.0207, "step": 24774 }, { "epoch": 2.5467160037002774, "grad_norm": 0.04089882969856262, "learning_rate": 0.01, "loss": 2.0475, "step": 24777 }, { "epoch": 2.5470243601603455, "grad_norm": 0.04492718353867531, "learning_rate": 0.01, "loss": 2.0333, "step": 24780 }, { "epoch": 2.547332716620413, "grad_norm": 0.0761101022362709, "learning_rate": 0.01, "loss": 2.024, "step": 24783 }, { "epoch": 2.547641073080481, "grad_norm": 0.09586388617753983, "learning_rate": 0.01, "loss": 2.0455, "step": 24786 }, { "epoch": 2.547949429540549, "grad_norm": 0.0410308912396431, "learning_rate": 0.01, "loss": 2.0067, "step": 24789 }, { "epoch": 2.5482577860006166, "grad_norm": 0.0583110935986042, "learning_rate": 0.01, "loss": 2.0433, "step": 24792 }, { "epoch": 2.5485661424606847, "grad_norm": 0.03310194984078407, "learning_rate": 0.01, "loss": 2.0022, "step": 24795 }, { "epoch": 2.5488744989207524, "grad_norm": 0.0849560797214508, "learning_rate": 0.01, "loss": 2.0458, "step": 24798 }, { "epoch": 2.5491828553808205, "grad_norm": 0.052898190915584564, "learning_rate": 0.01, "loss": 2.0099, "step": 24801 }, { "epoch": 2.549491211840888, "grad_norm": 0.09630381315946579, "learning_rate": 0.01, "loss": 2.0312, "step": 24804 }, { "epoch": 2.5497995683009558, "grad_norm": 0.04892333596944809, "learning_rate": 0.01, "loss": 2.0151, "step": 24807 }, { "epoch": 2.550107924761024, "grad_norm": 0.09465577453374863, "learning_rate": 0.01, "loss": 2.0163, "step": 24810 }, { "epoch": 2.5504162812210915, "grad_norm": 0.0832308977842331, "learning_rate": 0.01, "loss": 2.0446, "step": 24813 }, { "epoch": 2.550724637681159, "grad_norm": 0.11276236176490784, "learning_rate": 0.01, "loss": 2.0243, "step": 24816 }, { "epoch": 2.5510329941412273, "grad_norm": 0.08327414095401764, "learning_rate": 0.01, "loss": 2.0607, "step": 24819 }, { "epoch": 2.551341350601295, "grad_norm": 0.05502014979720116, "learning_rate": 0.01, "loss": 2.0281, "step": 24822 }, { "epoch": 2.551649707061363, "grad_norm": 0.03681863471865654, "learning_rate": 0.01, "loss": 2.0101, "step": 24825 }, { "epoch": 2.5519580635214307, "grad_norm": 0.08096860349178314, "learning_rate": 0.01, "loss": 1.9904, "step": 24828 }, { "epoch": 2.552266419981499, "grad_norm": 0.05901675671339035, "learning_rate": 0.01, "loss": 2.046, "step": 24831 }, { "epoch": 2.5525747764415665, "grad_norm": 0.09850065410137177, "learning_rate": 0.01, "loss": 2.0495, "step": 24834 }, { "epoch": 2.552883132901634, "grad_norm": 0.08438712358474731, "learning_rate": 0.01, "loss": 2.0185, "step": 24837 }, { "epoch": 2.5531914893617023, "grad_norm": 0.04949135333299637, "learning_rate": 0.01, "loss": 2.0197, "step": 24840 }, { "epoch": 2.55349984582177, "grad_norm": 0.044099997729063034, "learning_rate": 0.01, "loss": 2.0275, "step": 24843 }, { "epoch": 2.5538082022818376, "grad_norm": 0.08626649528741837, "learning_rate": 0.01, "loss": 2.0246, "step": 24846 }, { "epoch": 2.5541165587419057, "grad_norm": 0.08545881509780884, "learning_rate": 0.01, "loss": 2.0221, "step": 24849 }, { "epoch": 2.5544249152019733, "grad_norm": 0.06181343272328377, "learning_rate": 0.01, "loss": 2.0379, "step": 24852 }, { "epoch": 2.5547332716620414, "grad_norm": 0.0839785784482956, "learning_rate": 0.01, "loss": 2.0309, "step": 24855 }, { "epoch": 2.555041628122109, "grad_norm": 0.055504992604255676, "learning_rate": 0.01, "loss": 2.0333, "step": 24858 }, { "epoch": 2.555349984582177, "grad_norm": 0.04236135631799698, "learning_rate": 0.01, "loss": 1.9981, "step": 24861 }, { "epoch": 2.555658341042245, "grad_norm": 0.035614918917417526, "learning_rate": 0.01, "loss": 2.006, "step": 24864 }, { "epoch": 2.5559666975023125, "grad_norm": 0.04150492325425148, "learning_rate": 0.01, "loss": 2.0414, "step": 24867 }, { "epoch": 2.5562750539623806, "grad_norm": 0.07994359731674194, "learning_rate": 0.01, "loss": 2.0302, "step": 24870 }, { "epoch": 2.5565834104224483, "grad_norm": 0.08954035490751266, "learning_rate": 0.01, "loss": 2.0208, "step": 24873 }, { "epoch": 2.556891766882516, "grad_norm": 0.1362268626689911, "learning_rate": 0.01, "loss": 2.0211, "step": 24876 }, { "epoch": 2.557200123342584, "grad_norm": 0.11425944417715073, "learning_rate": 0.01, "loss": 2.0126, "step": 24879 }, { "epoch": 2.5575084798026517, "grad_norm": 0.07083035260438919, "learning_rate": 0.01, "loss": 2.0312, "step": 24882 }, { "epoch": 2.55781683626272, "grad_norm": 0.06250528246164322, "learning_rate": 0.01, "loss": 2.0317, "step": 24885 }, { "epoch": 2.5581251927227875, "grad_norm": 0.047506481409072876, "learning_rate": 0.01, "loss": 2.0134, "step": 24888 }, { "epoch": 2.5584335491828556, "grad_norm": 0.04237549751996994, "learning_rate": 0.01, "loss": 2.0227, "step": 24891 }, { "epoch": 2.5587419056429233, "grad_norm": 0.04128411412239075, "learning_rate": 0.01, "loss": 2.0122, "step": 24894 }, { "epoch": 2.559050262102991, "grad_norm": 0.03886473551392555, "learning_rate": 0.01, "loss": 2.0385, "step": 24897 }, { "epoch": 2.559358618563059, "grad_norm": 0.1163051575422287, "learning_rate": 0.01, "loss": 2.0456, "step": 24900 }, { "epoch": 2.5596669750231267, "grad_norm": 0.04279797896742821, "learning_rate": 0.01, "loss": 2.0122, "step": 24903 }, { "epoch": 2.5599753314831943, "grad_norm": 0.08159471303224564, "learning_rate": 0.01, "loss": 2.0218, "step": 24906 }, { "epoch": 2.5602836879432624, "grad_norm": 0.06161525472998619, "learning_rate": 0.01, "loss": 2.009, "step": 24909 }, { "epoch": 2.56059204440333, "grad_norm": 0.05011424049735069, "learning_rate": 0.01, "loss": 2.0001, "step": 24912 }, { "epoch": 2.560900400863398, "grad_norm": 0.05973159521818161, "learning_rate": 0.01, "loss": 2.0289, "step": 24915 }, { "epoch": 2.561208757323466, "grad_norm": 0.07461394369602203, "learning_rate": 0.01, "loss": 2.0357, "step": 24918 }, { "epoch": 2.561517113783534, "grad_norm": 0.09631699323654175, "learning_rate": 0.01, "loss": 2.0234, "step": 24921 }, { "epoch": 2.5618254702436016, "grad_norm": 0.05727219581604004, "learning_rate": 0.01, "loss": 2.0404, "step": 24924 }, { "epoch": 2.5621338267036693, "grad_norm": 0.08594338595867157, "learning_rate": 0.01, "loss": 2.0166, "step": 24927 }, { "epoch": 2.5624421831637374, "grad_norm": 0.1109083890914917, "learning_rate": 0.01, "loss": 2.0185, "step": 24930 }, { "epoch": 2.562750539623805, "grad_norm": 0.0593339204788208, "learning_rate": 0.01, "loss": 2.0064, "step": 24933 }, { "epoch": 2.5630588960838727, "grad_norm": 0.0381302647292614, "learning_rate": 0.01, "loss": 2.0236, "step": 24936 }, { "epoch": 2.563367252543941, "grad_norm": 0.056093595921993256, "learning_rate": 0.01, "loss": 2.0269, "step": 24939 }, { "epoch": 2.5636756090040085, "grad_norm": 0.11212731897830963, "learning_rate": 0.01, "loss": 2.0488, "step": 24942 }, { "epoch": 2.5639839654640766, "grad_norm": 0.07110024988651276, "learning_rate": 0.01, "loss": 2.013, "step": 24945 }, { "epoch": 2.5642923219241442, "grad_norm": 0.05951390787959099, "learning_rate": 0.01, "loss": 2.0412, "step": 24948 }, { "epoch": 2.5646006783842124, "grad_norm": 0.07836031913757324, "learning_rate": 0.01, "loss": 2.0143, "step": 24951 }, { "epoch": 2.56490903484428, "grad_norm": 0.06882999837398529, "learning_rate": 0.01, "loss": 2.0065, "step": 24954 }, { "epoch": 2.5652173913043477, "grad_norm": 0.0868605375289917, "learning_rate": 0.01, "loss": 2.0111, "step": 24957 }, { "epoch": 2.5655257477644158, "grad_norm": 0.10812171548604965, "learning_rate": 0.01, "loss": 2.0462, "step": 24960 }, { "epoch": 2.5658341042244834, "grad_norm": 0.07124783843755722, "learning_rate": 0.01, "loss": 2.013, "step": 24963 }, { "epoch": 2.566142460684551, "grad_norm": 0.037611838430166245, "learning_rate": 0.01, "loss": 2.0277, "step": 24966 }, { "epoch": 2.566450817144619, "grad_norm": 0.03723758086562157, "learning_rate": 0.01, "loss": 2.0187, "step": 24969 }, { "epoch": 2.566759173604687, "grad_norm": 0.08805309981107712, "learning_rate": 0.01, "loss": 2.01, "step": 24972 }, { "epoch": 2.567067530064755, "grad_norm": 0.10381683707237244, "learning_rate": 0.01, "loss": 2.0253, "step": 24975 }, { "epoch": 2.5673758865248226, "grad_norm": 0.11186369508504868, "learning_rate": 0.01, "loss": 2.0271, "step": 24978 }, { "epoch": 2.5676842429848907, "grad_norm": 0.07986850291490555, "learning_rate": 0.01, "loss": 2.0252, "step": 24981 }, { "epoch": 2.5679925994449584, "grad_norm": 0.04094192385673523, "learning_rate": 0.01, "loss": 1.9982, "step": 24984 }, { "epoch": 2.568300955905026, "grad_norm": 0.049611032009124756, "learning_rate": 0.01, "loss": 2.0283, "step": 24987 }, { "epoch": 2.568609312365094, "grad_norm": 0.05613689869642258, "learning_rate": 0.01, "loss": 2.0304, "step": 24990 }, { "epoch": 2.568917668825162, "grad_norm": 0.051894500851631165, "learning_rate": 0.01, "loss": 2.0069, "step": 24993 }, { "epoch": 2.5692260252852295, "grad_norm": 0.04092536121606827, "learning_rate": 0.01, "loss": 2.0283, "step": 24996 }, { "epoch": 2.5695343817452976, "grad_norm": 0.03474249318242073, "learning_rate": 0.01, "loss": 2.0083, "step": 24999 }, { "epoch": 2.5698427382053652, "grad_norm": 0.04513520747423172, "learning_rate": 0.01, "loss": 2.0288, "step": 25002 }, { "epoch": 2.5701510946654333, "grad_norm": 0.06130135431885719, "learning_rate": 0.01, "loss": 2.0245, "step": 25005 }, { "epoch": 2.570459451125501, "grad_norm": 0.07398026436567307, "learning_rate": 0.01, "loss": 2.0199, "step": 25008 }, { "epoch": 2.570767807585569, "grad_norm": 0.06060103699564934, "learning_rate": 0.01, "loss": 1.9956, "step": 25011 }, { "epoch": 2.5710761640456368, "grad_norm": 0.051868923008441925, "learning_rate": 0.01, "loss": 2.0355, "step": 25014 }, { "epoch": 2.5713845205057044, "grad_norm": 0.09465671330690384, "learning_rate": 0.01, "loss": 2.014, "step": 25017 }, { "epoch": 2.5716928769657725, "grad_norm": 0.048888836055994034, "learning_rate": 0.01, "loss": 2.0163, "step": 25020 }, { "epoch": 2.57200123342584, "grad_norm": 0.04938677325844765, "learning_rate": 0.01, "loss": 2.0165, "step": 25023 }, { "epoch": 2.572309589885908, "grad_norm": 0.1066848635673523, "learning_rate": 0.01, "loss": 2.0354, "step": 25026 }, { "epoch": 2.572617946345976, "grad_norm": 0.044199470430612564, "learning_rate": 0.01, "loss": 2.0358, "step": 25029 }, { "epoch": 2.572926302806044, "grad_norm": 0.06313291937112808, "learning_rate": 0.01, "loss": 2.0166, "step": 25032 }, { "epoch": 2.5732346592661117, "grad_norm": 0.08843620121479034, "learning_rate": 0.01, "loss": 2.0125, "step": 25035 }, { "epoch": 2.5735430157261794, "grad_norm": 0.028659775853157043, "learning_rate": 0.01, "loss": 2.0324, "step": 25038 }, { "epoch": 2.5738513721862475, "grad_norm": 0.09034299850463867, "learning_rate": 0.01, "loss": 2.0311, "step": 25041 }, { "epoch": 2.574159728646315, "grad_norm": 0.08496701717376709, "learning_rate": 0.01, "loss": 2.0457, "step": 25044 }, { "epoch": 2.574468085106383, "grad_norm": 0.04633186012506485, "learning_rate": 0.01, "loss": 2.0204, "step": 25047 }, { "epoch": 2.574776441566451, "grad_norm": 0.05091328173875809, "learning_rate": 0.01, "loss": 2.0622, "step": 25050 }, { "epoch": 2.5750847980265186, "grad_norm": 0.03941154107451439, "learning_rate": 0.01, "loss": 2.0097, "step": 25053 }, { "epoch": 2.5753931544865862, "grad_norm": 0.07574623823165894, "learning_rate": 0.01, "loss": 2.0119, "step": 25056 }, { "epoch": 2.5757015109466543, "grad_norm": 0.07106275856494904, "learning_rate": 0.01, "loss": 2.0164, "step": 25059 }, { "epoch": 2.5760098674067224, "grad_norm": 0.06767601519823074, "learning_rate": 0.01, "loss": 2.0243, "step": 25062 }, { "epoch": 2.57631822386679, "grad_norm": 0.05537039414048195, "learning_rate": 0.01, "loss": 2.0375, "step": 25065 }, { "epoch": 2.5766265803268578, "grad_norm": 0.06547438353300095, "learning_rate": 0.01, "loss": 2.0014, "step": 25068 }, { "epoch": 2.576934936786926, "grad_norm": 0.0862760990858078, "learning_rate": 0.01, "loss": 2.004, "step": 25071 }, { "epoch": 2.5772432932469935, "grad_norm": 0.041683733463287354, "learning_rate": 0.01, "loss": 2.0484, "step": 25074 }, { "epoch": 2.577551649707061, "grad_norm": 0.049321915954351425, "learning_rate": 0.01, "loss": 2.0158, "step": 25077 }, { "epoch": 2.5778600061671293, "grad_norm": 0.09261754900217056, "learning_rate": 0.01, "loss": 2.0162, "step": 25080 }, { "epoch": 2.578168362627197, "grad_norm": 0.07979609072208405, "learning_rate": 0.01, "loss": 2.0425, "step": 25083 }, { "epoch": 2.5784767190872646, "grad_norm": 0.06629879772663116, "learning_rate": 0.01, "loss": 2.0339, "step": 25086 }, { "epoch": 2.5787850755473327, "grad_norm": 0.07896976172924042, "learning_rate": 0.01, "loss": 2.0144, "step": 25089 }, { "epoch": 2.579093432007401, "grad_norm": 0.06102503091096878, "learning_rate": 0.01, "loss": 2.0012, "step": 25092 }, { "epoch": 2.5794017884674685, "grad_norm": 0.07823985069990158, "learning_rate": 0.01, "loss": 2.0207, "step": 25095 }, { "epoch": 2.579710144927536, "grad_norm": 0.08163253217935562, "learning_rate": 0.01, "loss": 2.0583, "step": 25098 }, { "epoch": 2.5800185013876042, "grad_norm": 0.06111651286482811, "learning_rate": 0.01, "loss": 2.0434, "step": 25101 }, { "epoch": 2.580326857847672, "grad_norm": 0.03768099471926689, "learning_rate": 0.01, "loss": 2.0185, "step": 25104 }, { "epoch": 2.5806352143077396, "grad_norm": 0.0871853157877922, "learning_rate": 0.01, "loss": 2.0293, "step": 25107 }, { "epoch": 2.5809435707678077, "grad_norm": 0.05394020304083824, "learning_rate": 0.01, "loss": 2.0249, "step": 25110 }, { "epoch": 2.5812519272278753, "grad_norm": 0.07910983264446259, "learning_rate": 0.01, "loss": 2.042, "step": 25113 }, { "epoch": 2.581560283687943, "grad_norm": 0.06922271102666855, "learning_rate": 0.01, "loss": 2.0493, "step": 25116 }, { "epoch": 2.581868640148011, "grad_norm": 0.05517781525850296, "learning_rate": 0.01, "loss": 2.0161, "step": 25119 }, { "epoch": 2.582176996608079, "grad_norm": 0.05166595056653023, "learning_rate": 0.01, "loss": 2.0402, "step": 25122 }, { "epoch": 2.582485353068147, "grad_norm": 0.045153357088565826, "learning_rate": 0.01, "loss": 2.0366, "step": 25125 }, { "epoch": 2.5827937095282145, "grad_norm": 0.07232387363910675, "learning_rate": 0.01, "loss": 2.0345, "step": 25128 }, { "epoch": 2.5831020659882826, "grad_norm": 0.035037536174058914, "learning_rate": 0.01, "loss": 2.0195, "step": 25131 }, { "epoch": 2.5834104224483503, "grad_norm": 0.039313822984695435, "learning_rate": 0.01, "loss": 2.0196, "step": 25134 }, { "epoch": 2.583718778908418, "grad_norm": 0.0632469579577446, "learning_rate": 0.01, "loss": 2.0454, "step": 25137 }, { "epoch": 2.584027135368486, "grad_norm": 0.10993051528930664, "learning_rate": 0.01, "loss": 2.0289, "step": 25140 }, { "epoch": 2.5843354918285537, "grad_norm": 0.0852990597486496, "learning_rate": 0.01, "loss": 2.053, "step": 25143 }, { "epoch": 2.5846438482886214, "grad_norm": 0.0442403107881546, "learning_rate": 0.01, "loss": 2.0089, "step": 25146 }, { "epoch": 2.5849522047486895, "grad_norm": 0.03534874692559242, "learning_rate": 0.01, "loss": 2.0297, "step": 25149 }, { "epoch": 2.5852605612087576, "grad_norm": 0.031708016991615295, "learning_rate": 0.01, "loss": 2.0064, "step": 25152 }, { "epoch": 2.5855689176688252, "grad_norm": 0.056695304811000824, "learning_rate": 0.01, "loss": 2.0265, "step": 25155 }, { "epoch": 2.585877274128893, "grad_norm": 0.12697716057300568, "learning_rate": 0.01, "loss": 2.0415, "step": 25158 }, { "epoch": 2.586185630588961, "grad_norm": 0.07686912268400192, "learning_rate": 0.01, "loss": 2.0098, "step": 25161 }, { "epoch": 2.5864939870490287, "grad_norm": 0.10015466809272766, "learning_rate": 0.01, "loss": 2.0229, "step": 25164 }, { "epoch": 2.5868023435090963, "grad_norm": 0.05786514654755592, "learning_rate": 0.01, "loss": 2.0025, "step": 25167 }, { "epoch": 2.5871106999691644, "grad_norm": 0.05359407886862755, "learning_rate": 0.01, "loss": 2.0075, "step": 25170 }, { "epoch": 2.587419056429232, "grad_norm": 0.10763208568096161, "learning_rate": 0.01, "loss": 2.0611, "step": 25173 }, { "epoch": 2.5877274128893, "grad_norm": 0.06255360692739487, "learning_rate": 0.01, "loss": 2.0173, "step": 25176 }, { "epoch": 2.588035769349368, "grad_norm": 0.0519418902695179, "learning_rate": 0.01, "loss": 2.0095, "step": 25179 }, { "epoch": 2.588344125809436, "grad_norm": 0.09810636937618256, "learning_rate": 0.01, "loss": 2.0012, "step": 25182 }, { "epoch": 2.5886524822695036, "grad_norm": 0.05091201886534691, "learning_rate": 0.01, "loss": 2.0221, "step": 25185 }, { "epoch": 2.5889608387295713, "grad_norm": 0.046215660870075226, "learning_rate": 0.01, "loss": 2.0235, "step": 25188 }, { "epoch": 2.5892691951896394, "grad_norm": 0.06873856484889984, "learning_rate": 0.01, "loss": 2.0234, "step": 25191 }, { "epoch": 2.589577551649707, "grad_norm": 0.08075796812772751, "learning_rate": 0.01, "loss": 2.0399, "step": 25194 }, { "epoch": 2.5898859081097747, "grad_norm": 0.10317845642566681, "learning_rate": 0.01, "loss": 2.0087, "step": 25197 }, { "epoch": 2.590194264569843, "grad_norm": 0.07780349254608154, "learning_rate": 0.01, "loss": 2.0052, "step": 25200 }, { "epoch": 2.5905026210299105, "grad_norm": 0.0646161437034607, "learning_rate": 0.01, "loss": 2.0077, "step": 25203 }, { "epoch": 2.5908109774899786, "grad_norm": 0.10224328190088272, "learning_rate": 0.01, "loss": 2.0091, "step": 25206 }, { "epoch": 2.5911193339500462, "grad_norm": 0.0714394599199295, "learning_rate": 0.01, "loss": 2.0246, "step": 25209 }, { "epoch": 2.5914276904101143, "grad_norm": 0.06261731684207916, "learning_rate": 0.01, "loss": 1.9908, "step": 25212 }, { "epoch": 2.591736046870182, "grad_norm": 0.07271763682365417, "learning_rate": 0.01, "loss": 2.0489, "step": 25215 }, { "epoch": 2.5920444033302497, "grad_norm": 0.07044383883476257, "learning_rate": 0.01, "loss": 2.0461, "step": 25218 }, { "epoch": 2.5923527597903178, "grad_norm": 0.10808674246072769, "learning_rate": 0.01, "loss": 2.0229, "step": 25221 }, { "epoch": 2.5926611162503854, "grad_norm": 0.049697790294885635, "learning_rate": 0.01, "loss": 2.0139, "step": 25224 }, { "epoch": 2.592969472710453, "grad_norm": 0.08951854705810547, "learning_rate": 0.01, "loss": 2.0228, "step": 25227 }, { "epoch": 2.593277829170521, "grad_norm": 0.06834417581558228, "learning_rate": 0.01, "loss": 2.0362, "step": 25230 }, { "epoch": 2.593586185630589, "grad_norm": 0.037199974060058594, "learning_rate": 0.01, "loss": 1.9987, "step": 25233 }, { "epoch": 2.593894542090657, "grad_norm": 0.056284189224243164, "learning_rate": 0.01, "loss": 2.0313, "step": 25236 }, { "epoch": 2.5942028985507246, "grad_norm": 0.07686278969049454, "learning_rate": 0.01, "loss": 2.0376, "step": 25239 }, { "epoch": 2.5945112550107927, "grad_norm": 0.043646443635225296, "learning_rate": 0.01, "loss": 2.0145, "step": 25242 }, { "epoch": 2.5948196114708604, "grad_norm": 0.0594371035695076, "learning_rate": 0.01, "loss": 2.0147, "step": 25245 }, { "epoch": 2.595127967930928, "grad_norm": 0.08617819100618362, "learning_rate": 0.01, "loss": 2.0307, "step": 25248 }, { "epoch": 2.595436324390996, "grad_norm": 0.13672196865081787, "learning_rate": 0.01, "loss": 2.0515, "step": 25251 }, { "epoch": 2.595744680851064, "grad_norm": 0.062405068427324295, "learning_rate": 0.01, "loss": 2.0205, "step": 25254 }, { "epoch": 2.5960530373111315, "grad_norm": 0.042263686656951904, "learning_rate": 0.01, "loss": 2.0336, "step": 25257 }, { "epoch": 2.5963613937711996, "grad_norm": 0.04821668192744255, "learning_rate": 0.01, "loss": 2.0231, "step": 25260 }, { "epoch": 2.5966697502312672, "grad_norm": 0.048817023634910583, "learning_rate": 0.01, "loss": 2.0178, "step": 25263 }, { "epoch": 2.5969781066913353, "grad_norm": 0.05794850364327431, "learning_rate": 0.01, "loss": 2.023, "step": 25266 }, { "epoch": 2.597286463151403, "grad_norm": 0.05057196319103241, "learning_rate": 0.01, "loss": 2.0075, "step": 25269 }, { "epoch": 2.597594819611471, "grad_norm": 0.05443112552165985, "learning_rate": 0.01, "loss": 2.0381, "step": 25272 }, { "epoch": 2.5979031760715388, "grad_norm": 0.0533830001950264, "learning_rate": 0.01, "loss": 2.0201, "step": 25275 }, { "epoch": 2.5982115325316064, "grad_norm": 0.040888138115406036, "learning_rate": 0.01, "loss": 1.9956, "step": 25278 }, { "epoch": 2.5985198889916745, "grad_norm": 0.07154986262321472, "learning_rate": 0.01, "loss": 2.0285, "step": 25281 }, { "epoch": 2.598828245451742, "grad_norm": 0.17314322292804718, "learning_rate": 0.01, "loss": 2.0288, "step": 25284 }, { "epoch": 2.59913660191181, "grad_norm": 0.051602523773908615, "learning_rate": 0.01, "loss": 2.0351, "step": 25287 }, { "epoch": 2.599444958371878, "grad_norm": 0.047731827944517136, "learning_rate": 0.01, "loss": 2.0435, "step": 25290 }, { "epoch": 2.5997533148319456, "grad_norm": 0.0334257036447525, "learning_rate": 0.01, "loss": 2.0074, "step": 25293 }, { "epoch": 2.6000616712920137, "grad_norm": 0.03708617389202118, "learning_rate": 0.01, "loss": 1.9933, "step": 25296 }, { "epoch": 2.6003700277520814, "grad_norm": 0.08540193736553192, "learning_rate": 0.01, "loss": 2.0085, "step": 25299 }, { "epoch": 2.6006783842121495, "grad_norm": 0.1036924496293068, "learning_rate": 0.01, "loss": 2.0238, "step": 25302 }, { "epoch": 2.600986740672217, "grad_norm": 0.056603506207466125, "learning_rate": 0.01, "loss": 2.0161, "step": 25305 }, { "epoch": 2.601295097132285, "grad_norm": 0.1030723974108696, "learning_rate": 0.01, "loss": 2.0414, "step": 25308 }, { "epoch": 2.601603453592353, "grad_norm": 0.060525115579366684, "learning_rate": 0.01, "loss": 2.0083, "step": 25311 }, { "epoch": 2.6019118100524206, "grad_norm": 0.061082128435373306, "learning_rate": 0.01, "loss": 1.987, "step": 25314 }, { "epoch": 2.602220166512488, "grad_norm": 0.045477550476789474, "learning_rate": 0.01, "loss": 2.0251, "step": 25317 }, { "epoch": 2.6025285229725563, "grad_norm": 0.03306104615330696, "learning_rate": 0.01, "loss": 2.0359, "step": 25320 }, { "epoch": 2.602836879432624, "grad_norm": 0.052543554455041885, "learning_rate": 0.01, "loss": 2.0454, "step": 25323 }, { "epoch": 2.603145235892692, "grad_norm": 0.04408182203769684, "learning_rate": 0.01, "loss": 2.0375, "step": 25326 }, { "epoch": 2.6034535923527597, "grad_norm": 0.05216488614678383, "learning_rate": 0.01, "loss": 2.0331, "step": 25329 }, { "epoch": 2.603761948812828, "grad_norm": 0.12084914743900299, "learning_rate": 0.01, "loss": 2.0052, "step": 25332 }, { "epoch": 2.6040703052728955, "grad_norm": 0.09642963856458664, "learning_rate": 0.01, "loss": 2.0161, "step": 25335 }, { "epoch": 2.604378661732963, "grad_norm": 0.06409110128879547, "learning_rate": 0.01, "loss": 2.0052, "step": 25338 }, { "epoch": 2.6046870181930313, "grad_norm": 0.07277770340442657, "learning_rate": 0.01, "loss": 2.0241, "step": 25341 }, { "epoch": 2.604995374653099, "grad_norm": 0.049252595752477646, "learning_rate": 0.01, "loss": 2.0172, "step": 25344 }, { "epoch": 2.6053037311131666, "grad_norm": 0.0495469830930233, "learning_rate": 0.01, "loss": 2.0195, "step": 25347 }, { "epoch": 2.6056120875732347, "grad_norm": 0.06318475306034088, "learning_rate": 0.01, "loss": 1.9937, "step": 25350 }, { "epoch": 2.6059204440333024, "grad_norm": 0.07843147218227386, "learning_rate": 0.01, "loss": 2.0333, "step": 25353 }, { "epoch": 2.6062288004933705, "grad_norm": 0.055239688605070114, "learning_rate": 0.01, "loss": 2.0308, "step": 25356 }, { "epoch": 2.606537156953438, "grad_norm": 0.03876148536801338, "learning_rate": 0.01, "loss": 2.0227, "step": 25359 }, { "epoch": 2.6068455134135062, "grad_norm": 0.12309783697128296, "learning_rate": 0.01, "loss": 2.0233, "step": 25362 }, { "epoch": 2.607153869873574, "grad_norm": 0.09926038980484009, "learning_rate": 0.01, "loss": 2.0305, "step": 25365 }, { "epoch": 2.6074622263336416, "grad_norm": 0.07237336784601212, "learning_rate": 0.01, "loss": 1.9929, "step": 25368 }, { "epoch": 2.6077705827937097, "grad_norm": 0.09653117507696152, "learning_rate": 0.01, "loss": 2.0547, "step": 25371 }, { "epoch": 2.6080789392537773, "grad_norm": 0.0454515665769577, "learning_rate": 0.01, "loss": 2.0324, "step": 25374 }, { "epoch": 2.608387295713845, "grad_norm": 0.04541772976517677, "learning_rate": 0.01, "loss": 2.0301, "step": 25377 }, { "epoch": 2.608695652173913, "grad_norm": 0.05721856653690338, "learning_rate": 0.01, "loss": 2.011, "step": 25380 }, { "epoch": 2.6090040086339807, "grad_norm": 0.0526764839887619, "learning_rate": 0.01, "loss": 2.0176, "step": 25383 }, { "epoch": 2.609312365094049, "grad_norm": 0.08415975421667099, "learning_rate": 0.01, "loss": 1.9948, "step": 25386 }, { "epoch": 2.6096207215541165, "grad_norm": 0.07950541377067566, "learning_rate": 0.01, "loss": 2.0285, "step": 25389 }, { "epoch": 2.6099290780141846, "grad_norm": 0.07684530317783356, "learning_rate": 0.01, "loss": 2.0283, "step": 25392 }, { "epoch": 2.6102374344742523, "grad_norm": 0.0458965003490448, "learning_rate": 0.01, "loss": 2.0335, "step": 25395 }, { "epoch": 2.61054579093432, "grad_norm": 0.11776190251111984, "learning_rate": 0.01, "loss": 2.0117, "step": 25398 }, { "epoch": 2.610854147394388, "grad_norm": 0.03954809904098511, "learning_rate": 0.01, "loss": 2.0352, "step": 25401 }, { "epoch": 2.6111625038544557, "grad_norm": 0.08056820929050446, "learning_rate": 0.01, "loss": 2.0338, "step": 25404 }, { "epoch": 2.6114708603145234, "grad_norm": 0.03288201987743378, "learning_rate": 0.01, "loss": 2.0067, "step": 25407 }, { "epoch": 2.6117792167745915, "grad_norm": 0.06156465783715248, "learning_rate": 0.01, "loss": 2.0455, "step": 25410 }, { "epoch": 2.612087573234659, "grad_norm": 0.04141581431031227, "learning_rate": 0.01, "loss": 1.9822, "step": 25413 }, { "epoch": 2.6123959296947272, "grad_norm": 0.06731928139925003, "learning_rate": 0.01, "loss": 2.028, "step": 25416 }, { "epoch": 2.612704286154795, "grad_norm": 0.07682816684246063, "learning_rate": 0.01, "loss": 2.0155, "step": 25419 }, { "epoch": 2.613012642614863, "grad_norm": 0.10766996443271637, "learning_rate": 0.01, "loss": 1.9897, "step": 25422 }, { "epoch": 2.6133209990749307, "grad_norm": 0.11409672349691391, "learning_rate": 0.01, "loss": 1.9823, "step": 25425 }, { "epoch": 2.6136293555349983, "grad_norm": 0.07693130522966385, "learning_rate": 0.01, "loss": 2.02, "step": 25428 }, { "epoch": 2.6139377119950664, "grad_norm": 0.034606434404850006, "learning_rate": 0.01, "loss": 2.043, "step": 25431 }, { "epoch": 2.614246068455134, "grad_norm": 0.0957694724202156, "learning_rate": 0.01, "loss": 2.0527, "step": 25434 }, { "epoch": 2.6145544249152017, "grad_norm": 0.05739649757742882, "learning_rate": 0.01, "loss": 2.0209, "step": 25437 }, { "epoch": 2.61486278137527, "grad_norm": 0.05702357366681099, "learning_rate": 0.01, "loss": 2.0321, "step": 25440 }, { "epoch": 2.6151711378353375, "grad_norm": 0.10596863180398941, "learning_rate": 0.01, "loss": 2.023, "step": 25443 }, { "epoch": 2.6154794942954056, "grad_norm": 0.07135487347841263, "learning_rate": 0.01, "loss": 2.0192, "step": 25446 }, { "epoch": 2.6157878507554733, "grad_norm": 0.04034152254462242, "learning_rate": 0.01, "loss": 2.034, "step": 25449 }, { "epoch": 2.6160962072155414, "grad_norm": 0.05510259047150612, "learning_rate": 0.01, "loss": 2.0201, "step": 25452 }, { "epoch": 2.616404563675609, "grad_norm": 0.03920494019985199, "learning_rate": 0.01, "loss": 1.9831, "step": 25455 }, { "epoch": 2.6167129201356767, "grad_norm": 0.06935703754425049, "learning_rate": 0.01, "loss": 2.0242, "step": 25458 }, { "epoch": 2.617021276595745, "grad_norm": 0.04524112120270729, "learning_rate": 0.01, "loss": 2.0415, "step": 25461 }, { "epoch": 2.6173296330558125, "grad_norm": 0.03639009967446327, "learning_rate": 0.01, "loss": 2.0086, "step": 25464 }, { "epoch": 2.61763798951588, "grad_norm": 0.0551072359085083, "learning_rate": 0.01, "loss": 2.0279, "step": 25467 }, { "epoch": 2.617946345975948, "grad_norm": 0.03943365439772606, "learning_rate": 0.01, "loss": 2.0229, "step": 25470 }, { "epoch": 2.618254702436016, "grad_norm": 0.05363466218113899, "learning_rate": 0.01, "loss": 2.0285, "step": 25473 }, { "epoch": 2.618563058896084, "grad_norm": 0.10065023601055145, "learning_rate": 0.01, "loss": 2.0307, "step": 25476 }, { "epoch": 2.6188714153561516, "grad_norm": 0.06447052210569382, "learning_rate": 0.01, "loss": 2.0375, "step": 25479 }, { "epoch": 2.6191797718162197, "grad_norm": 0.03966406360268593, "learning_rate": 0.01, "loss": 1.9923, "step": 25482 }, { "epoch": 2.6194881282762874, "grad_norm": 0.05280005559325218, "learning_rate": 0.01, "loss": 2.0184, "step": 25485 }, { "epoch": 2.619796484736355, "grad_norm": 0.1111968457698822, "learning_rate": 0.01, "loss": 1.9933, "step": 25488 }, { "epoch": 2.620104841196423, "grad_norm": 0.11483361572027206, "learning_rate": 0.01, "loss": 2.0007, "step": 25491 }, { "epoch": 2.620413197656491, "grad_norm": 0.039019446820020676, "learning_rate": 0.01, "loss": 2.0435, "step": 25494 }, { "epoch": 2.6207215541165585, "grad_norm": 0.05683600530028343, "learning_rate": 0.01, "loss": 2.0323, "step": 25497 }, { "epoch": 2.6210299105766266, "grad_norm": 0.042798321694135666, "learning_rate": 0.01, "loss": 2.0255, "step": 25500 }, { "epoch": 2.6213382670366943, "grad_norm": 0.040838126093149185, "learning_rate": 0.01, "loss": 2.0164, "step": 25503 }, { "epoch": 2.6216466234967624, "grad_norm": 0.07249750196933746, "learning_rate": 0.01, "loss": 2.0395, "step": 25506 }, { "epoch": 2.62195497995683, "grad_norm": 0.08197704702615738, "learning_rate": 0.01, "loss": 1.9932, "step": 25509 }, { "epoch": 2.622263336416898, "grad_norm": 0.11885122954845428, "learning_rate": 0.01, "loss": 2.0203, "step": 25512 }, { "epoch": 2.622571692876966, "grad_norm": 0.07574759423732758, "learning_rate": 0.01, "loss": 2.0443, "step": 25515 }, { "epoch": 2.6228800493370334, "grad_norm": 0.05106687173247337, "learning_rate": 0.01, "loss": 2.0222, "step": 25518 }, { "epoch": 2.6231884057971016, "grad_norm": 0.03381403908133507, "learning_rate": 0.01, "loss": 2.0167, "step": 25521 }, { "epoch": 2.623496762257169, "grad_norm": 0.04259166494011879, "learning_rate": 0.01, "loss": 1.9915, "step": 25524 }, { "epoch": 2.623805118717237, "grad_norm": 0.06276170909404755, "learning_rate": 0.01, "loss": 2.0116, "step": 25527 }, { "epoch": 2.624113475177305, "grad_norm": 0.05478539317846298, "learning_rate": 0.01, "loss": 2.0299, "step": 25530 }, { "epoch": 2.6244218316373726, "grad_norm": 0.05915123224258423, "learning_rate": 0.01, "loss": 2.0211, "step": 25533 }, { "epoch": 2.6247301880974407, "grad_norm": 0.09082819521427155, "learning_rate": 0.01, "loss": 2.038, "step": 25536 }, { "epoch": 2.6250385445575084, "grad_norm": 0.04604744166135788, "learning_rate": 0.01, "loss": 2.0395, "step": 25539 }, { "epoch": 2.6253469010175765, "grad_norm": 0.04548676684498787, "learning_rate": 0.01, "loss": 2.0144, "step": 25542 }, { "epoch": 2.625655257477644, "grad_norm": 0.03629077225923538, "learning_rate": 0.01, "loss": 2.0237, "step": 25545 }, { "epoch": 2.625963613937712, "grad_norm": 0.060490988194942474, "learning_rate": 0.01, "loss": 2.0049, "step": 25548 }, { "epoch": 2.62627197039778, "grad_norm": 0.12501440942287445, "learning_rate": 0.01, "loss": 2.0289, "step": 25551 }, { "epoch": 2.6265803268578476, "grad_norm": 0.052221618592739105, "learning_rate": 0.01, "loss": 2.0092, "step": 25554 }, { "epoch": 2.6268886833179153, "grad_norm": 0.08127149194478989, "learning_rate": 0.01, "loss": 2.0133, "step": 25557 }, { "epoch": 2.6271970397779834, "grad_norm": 0.13186825811862946, "learning_rate": 0.01, "loss": 2.0281, "step": 25560 }, { "epoch": 2.6275053962380515, "grad_norm": 0.10297831892967224, "learning_rate": 0.01, "loss": 2.0137, "step": 25563 }, { "epoch": 2.627813752698119, "grad_norm": 0.04737088829278946, "learning_rate": 0.01, "loss": 2.0334, "step": 25566 }, { "epoch": 2.628122109158187, "grad_norm": 0.055748652666807175, "learning_rate": 0.01, "loss": 2.0085, "step": 25569 }, { "epoch": 2.628430465618255, "grad_norm": 0.08968318998813629, "learning_rate": 0.01, "loss": 2.023, "step": 25572 }, { "epoch": 2.6287388220783225, "grad_norm": 0.04962621256709099, "learning_rate": 0.01, "loss": 2.0111, "step": 25575 }, { "epoch": 2.62904717853839, "grad_norm": 0.033645693212747574, "learning_rate": 0.01, "loss": 2.0332, "step": 25578 }, { "epoch": 2.6293555349984583, "grad_norm": 0.05864016339182854, "learning_rate": 0.01, "loss": 2.0159, "step": 25581 }, { "epoch": 2.629663891458526, "grad_norm": 0.12207403779029846, "learning_rate": 0.01, "loss": 2.0282, "step": 25584 }, { "epoch": 2.6299722479185936, "grad_norm": 0.049948643893003464, "learning_rate": 0.01, "loss": 2.0534, "step": 25587 }, { "epoch": 2.6302806043786617, "grad_norm": 0.08774693310260773, "learning_rate": 0.01, "loss": 2.0214, "step": 25590 }, { "epoch": 2.63058896083873, "grad_norm": 0.053613871335983276, "learning_rate": 0.01, "loss": 2.0171, "step": 25593 }, { "epoch": 2.6308973172987975, "grad_norm": 0.06298764050006866, "learning_rate": 0.01, "loss": 2.0197, "step": 25596 }, { "epoch": 2.631205673758865, "grad_norm": 0.03511015325784683, "learning_rate": 0.01, "loss": 2.0239, "step": 25599 }, { "epoch": 2.6315140302189333, "grad_norm": 0.09345996379852295, "learning_rate": 0.01, "loss": 2.0239, "step": 25602 }, { "epoch": 2.631822386679001, "grad_norm": 0.06202877685427666, "learning_rate": 0.01, "loss": 2.0286, "step": 25605 }, { "epoch": 2.6321307431390686, "grad_norm": 0.10231085866689682, "learning_rate": 0.01, "loss": 2.0324, "step": 25608 }, { "epoch": 2.6324390995991367, "grad_norm": 0.12403954565525055, "learning_rate": 0.01, "loss": 2.0074, "step": 25611 }, { "epoch": 2.6327474560592043, "grad_norm": 0.059275902807712555, "learning_rate": 0.01, "loss": 2.0093, "step": 25614 }, { "epoch": 2.633055812519272, "grad_norm": 0.04833563044667244, "learning_rate": 0.01, "loss": 2.0218, "step": 25617 }, { "epoch": 2.63336416897934, "grad_norm": 0.04218841344118118, "learning_rate": 0.01, "loss": 2.0132, "step": 25620 }, { "epoch": 2.633672525439408, "grad_norm": 0.1189088523387909, "learning_rate": 0.01, "loss": 2.0407, "step": 25623 }, { "epoch": 2.633980881899476, "grad_norm": 0.11460559070110321, "learning_rate": 0.01, "loss": 2.0083, "step": 25626 }, { "epoch": 2.6342892383595435, "grad_norm": 0.13970693945884705, "learning_rate": 0.01, "loss": 2.0195, "step": 25629 }, { "epoch": 2.6345975948196116, "grad_norm": 0.06081441789865494, "learning_rate": 0.01, "loss": 2.0285, "step": 25632 }, { "epoch": 2.6349059512796793, "grad_norm": 0.055472832173109055, "learning_rate": 0.01, "loss": 2.0019, "step": 25635 }, { "epoch": 2.635214307739747, "grad_norm": 0.06767648458480835, "learning_rate": 0.01, "loss": 2.0193, "step": 25638 }, { "epoch": 2.635522664199815, "grad_norm": 0.060980260372161865, "learning_rate": 0.01, "loss": 2.0137, "step": 25641 }, { "epoch": 2.6358310206598827, "grad_norm": 0.04839742183685303, "learning_rate": 0.01, "loss": 1.9982, "step": 25644 }, { "epoch": 2.6361393771199504, "grad_norm": 0.04725825786590576, "learning_rate": 0.01, "loss": 2.0176, "step": 25647 }, { "epoch": 2.6364477335800185, "grad_norm": 0.04045959562063217, "learning_rate": 0.01, "loss": 2.0455, "step": 25650 }, { "epoch": 2.6367560900400866, "grad_norm": 0.07219504565000534, "learning_rate": 0.01, "loss": 2.0391, "step": 25653 }, { "epoch": 2.6370644465001543, "grad_norm": 0.0438094437122345, "learning_rate": 0.01, "loss": 2.029, "step": 25656 }, { "epoch": 2.637372802960222, "grad_norm": 0.06555571407079697, "learning_rate": 0.01, "loss": 2.033, "step": 25659 }, { "epoch": 2.63768115942029, "grad_norm": 0.07731533795595169, "learning_rate": 0.01, "loss": 2.0432, "step": 25662 }, { "epoch": 2.6379895158803577, "grad_norm": 0.045945990830659866, "learning_rate": 0.01, "loss": 2.0308, "step": 25665 }, { "epoch": 2.6382978723404253, "grad_norm": 0.07175582647323608, "learning_rate": 0.01, "loss": 2.019, "step": 25668 }, { "epoch": 2.6386062288004934, "grad_norm": 0.07860400527715683, "learning_rate": 0.01, "loss": 2.0291, "step": 25671 }, { "epoch": 2.638914585260561, "grad_norm": 0.05635571479797363, "learning_rate": 0.01, "loss": 2.0519, "step": 25674 }, { "epoch": 2.639222941720629, "grad_norm": 0.07352261245250702, "learning_rate": 0.01, "loss": 2.0235, "step": 25677 }, { "epoch": 2.639531298180697, "grad_norm": 0.07502644509077072, "learning_rate": 0.01, "loss": 1.9877, "step": 25680 }, { "epoch": 2.639839654640765, "grad_norm": 0.05903060361742973, "learning_rate": 0.01, "loss": 2.012, "step": 25683 }, { "epoch": 2.6401480111008326, "grad_norm": 0.07454460859298706, "learning_rate": 0.01, "loss": 1.9988, "step": 25686 }, { "epoch": 2.6404563675609003, "grad_norm": 0.06615950912237167, "learning_rate": 0.01, "loss": 2.0379, "step": 25689 }, { "epoch": 2.6407647240209684, "grad_norm": 0.07897205650806427, "learning_rate": 0.01, "loss": 2.0366, "step": 25692 }, { "epoch": 2.641073080481036, "grad_norm": 0.12898573279380798, "learning_rate": 0.01, "loss": 2.0252, "step": 25695 }, { "epoch": 2.6413814369411037, "grad_norm": 0.10731782764196396, "learning_rate": 0.01, "loss": 2.0508, "step": 25698 }, { "epoch": 2.641689793401172, "grad_norm": 0.07936359196901321, "learning_rate": 0.01, "loss": 1.9993, "step": 25701 }, { "epoch": 2.6419981498612395, "grad_norm": 0.07443369179964066, "learning_rate": 0.01, "loss": 2.0586, "step": 25704 }, { "epoch": 2.6423065063213076, "grad_norm": 0.07027627527713776, "learning_rate": 0.01, "loss": 2.0375, "step": 25707 }, { "epoch": 2.6426148627813753, "grad_norm": 0.04909298196434975, "learning_rate": 0.01, "loss": 2.0234, "step": 25710 }, { "epoch": 2.6429232192414434, "grad_norm": 0.09595979005098343, "learning_rate": 0.01, "loss": 2.0072, "step": 25713 }, { "epoch": 2.643231575701511, "grad_norm": 0.07217621803283691, "learning_rate": 0.01, "loss": 2.0187, "step": 25716 }, { "epoch": 2.6435399321615787, "grad_norm": 0.10547403246164322, "learning_rate": 0.01, "loss": 2.0351, "step": 25719 }, { "epoch": 2.643848288621647, "grad_norm": 0.048883359879255295, "learning_rate": 0.01, "loss": 1.9996, "step": 25722 }, { "epoch": 2.6441566450817144, "grad_norm": 0.09399348497390747, "learning_rate": 0.01, "loss": 2.0112, "step": 25725 }, { "epoch": 2.644465001541782, "grad_norm": 0.05369775742292404, "learning_rate": 0.01, "loss": 2.0483, "step": 25728 }, { "epoch": 2.64477335800185, "grad_norm": 0.08048120886087418, "learning_rate": 0.01, "loss": 2.0279, "step": 25731 }, { "epoch": 2.645081714461918, "grad_norm": 0.07802169024944305, "learning_rate": 0.01, "loss": 2.0178, "step": 25734 }, { "epoch": 2.645390070921986, "grad_norm": 0.06137097254395485, "learning_rate": 0.01, "loss": 2.0382, "step": 25737 }, { "epoch": 2.6456984273820536, "grad_norm": 0.05498100444674492, "learning_rate": 0.01, "loss": 2.0252, "step": 25740 }, { "epoch": 2.6460067838421217, "grad_norm": 0.057225704193115234, "learning_rate": 0.01, "loss": 2.0243, "step": 25743 }, { "epoch": 2.6463151403021894, "grad_norm": 0.06727226078510284, "learning_rate": 0.01, "loss": 2.0316, "step": 25746 }, { "epoch": 2.646623496762257, "grad_norm": 0.04740385711193085, "learning_rate": 0.01, "loss": 2.0263, "step": 25749 }, { "epoch": 2.646931853222325, "grad_norm": 0.04808951914310455, "learning_rate": 0.01, "loss": 2.0063, "step": 25752 }, { "epoch": 2.647240209682393, "grad_norm": 0.04689471423625946, "learning_rate": 0.01, "loss": 2.0209, "step": 25755 }, { "epoch": 2.6475485661424605, "grad_norm": 0.10928355902433395, "learning_rate": 0.01, "loss": 1.9951, "step": 25758 }, { "epoch": 2.6478569226025286, "grad_norm": 0.054571595042943954, "learning_rate": 0.01, "loss": 2.0216, "step": 25761 }, { "epoch": 2.6481652790625962, "grad_norm": 0.0809570774435997, "learning_rate": 0.01, "loss": 2.0451, "step": 25764 }, { "epoch": 2.6484736355226643, "grad_norm": 0.10407903790473938, "learning_rate": 0.01, "loss": 2.0205, "step": 25767 }, { "epoch": 2.648781991982732, "grad_norm": 0.045180514454841614, "learning_rate": 0.01, "loss": 2.0137, "step": 25770 }, { "epoch": 2.6490903484428, "grad_norm": 0.07642797380685806, "learning_rate": 0.01, "loss": 1.9826, "step": 25773 }, { "epoch": 2.6493987049028678, "grad_norm": 0.06943316757678986, "learning_rate": 0.01, "loss": 2.0186, "step": 25776 }, { "epoch": 2.6497070613629354, "grad_norm": 0.06536037474870682, "learning_rate": 0.01, "loss": 2.008, "step": 25779 }, { "epoch": 2.6500154178230035, "grad_norm": 0.04698020964860916, "learning_rate": 0.01, "loss": 2.0149, "step": 25782 }, { "epoch": 2.650323774283071, "grad_norm": 0.05523454770445824, "learning_rate": 0.01, "loss": 2.0191, "step": 25785 }, { "epoch": 2.650632130743139, "grad_norm": 0.043647054582834244, "learning_rate": 0.01, "loss": 1.9993, "step": 25788 }, { "epoch": 2.650940487203207, "grad_norm": 0.05682402104139328, "learning_rate": 0.01, "loss": 2.0315, "step": 25791 }, { "epoch": 2.6512488436632746, "grad_norm": 0.10919704288244247, "learning_rate": 0.01, "loss": 1.9967, "step": 25794 }, { "epoch": 2.6515572001233427, "grad_norm": 0.0895003080368042, "learning_rate": 0.01, "loss": 2.0361, "step": 25797 }, { "epoch": 2.6518655565834104, "grad_norm": 0.07750852406024933, "learning_rate": 0.01, "loss": 2.0079, "step": 25800 }, { "epoch": 2.6521739130434785, "grad_norm": 0.07465649396181107, "learning_rate": 0.01, "loss": 2.0272, "step": 25803 }, { "epoch": 2.652482269503546, "grad_norm": 0.042654186487197876, "learning_rate": 0.01, "loss": 2.0386, "step": 25806 }, { "epoch": 2.652790625963614, "grad_norm": 0.04910847172141075, "learning_rate": 0.01, "loss": 2.0049, "step": 25809 }, { "epoch": 2.653098982423682, "grad_norm": 0.04584549367427826, "learning_rate": 0.01, "loss": 2.0232, "step": 25812 }, { "epoch": 2.6534073388837496, "grad_norm": 0.03400958329439163, "learning_rate": 0.01, "loss": 1.9994, "step": 25815 }, { "epoch": 2.6537156953438172, "grad_norm": 0.11867182701826096, "learning_rate": 0.01, "loss": 2.0275, "step": 25818 }, { "epoch": 2.6540240518038853, "grad_norm": 0.09303310513496399, "learning_rate": 0.01, "loss": 2.0533, "step": 25821 }, { "epoch": 2.654332408263953, "grad_norm": 0.04951944947242737, "learning_rate": 0.01, "loss": 2.0391, "step": 25824 }, { "epoch": 2.654640764724021, "grad_norm": 0.05893867462873459, "learning_rate": 0.01, "loss": 2.0109, "step": 25827 }, { "epoch": 2.6549491211840888, "grad_norm": 0.034138891845941544, "learning_rate": 0.01, "loss": 2.0101, "step": 25830 }, { "epoch": 2.655257477644157, "grad_norm": 0.039834585040807724, "learning_rate": 0.01, "loss": 2.0008, "step": 25833 }, { "epoch": 2.6555658341042245, "grad_norm": 0.10098033398389816, "learning_rate": 0.01, "loss": 2.0156, "step": 25836 }, { "epoch": 2.655874190564292, "grad_norm": 0.07205129414796829, "learning_rate": 0.01, "loss": 2.0256, "step": 25839 }, { "epoch": 2.6561825470243603, "grad_norm": 0.04370702803134918, "learning_rate": 0.01, "loss": 2.0031, "step": 25842 }, { "epoch": 2.656490903484428, "grad_norm": 0.10662799328565598, "learning_rate": 0.01, "loss": 2.0311, "step": 25845 }, { "epoch": 2.6567992599444956, "grad_norm": 0.1833692342042923, "learning_rate": 0.01, "loss": 2.0585, "step": 25848 }, { "epoch": 2.6571076164045637, "grad_norm": 0.12229418754577637, "learning_rate": 0.01, "loss": 2.0219, "step": 25851 }, { "epoch": 2.6574159728646314, "grad_norm": 0.05533694103360176, "learning_rate": 0.01, "loss": 2.026, "step": 25854 }, { "epoch": 2.6577243293246995, "grad_norm": 0.042578935623168945, "learning_rate": 0.01, "loss": 2.0247, "step": 25857 }, { "epoch": 2.658032685784767, "grad_norm": 0.034579649567604065, "learning_rate": 0.01, "loss": 2.0286, "step": 25860 }, { "epoch": 2.6583410422448353, "grad_norm": 0.052627481520175934, "learning_rate": 0.01, "loss": 1.986, "step": 25863 }, { "epoch": 2.658649398704903, "grad_norm": 0.04877294600009918, "learning_rate": 0.01, "loss": 2.0157, "step": 25866 }, { "epoch": 2.6589577551649706, "grad_norm": 0.05192401632666588, "learning_rate": 0.01, "loss": 2.0321, "step": 25869 }, { "epoch": 2.6592661116250387, "grad_norm": 0.08188097178936005, "learning_rate": 0.01, "loss": 2.0074, "step": 25872 }, { "epoch": 2.6595744680851063, "grad_norm": 0.06215586140751839, "learning_rate": 0.01, "loss": 1.9925, "step": 25875 }, { "epoch": 2.659882824545174, "grad_norm": 0.10561207681894302, "learning_rate": 0.01, "loss": 2.0233, "step": 25878 }, { "epoch": 2.660191181005242, "grad_norm": 0.08467070758342743, "learning_rate": 0.01, "loss": 2.0503, "step": 25881 }, { "epoch": 2.6604995374653098, "grad_norm": 0.05138308182358742, "learning_rate": 0.01, "loss": 2.0492, "step": 25884 }, { "epoch": 2.660807893925378, "grad_norm": 0.07387588173151016, "learning_rate": 0.01, "loss": 2.0134, "step": 25887 }, { "epoch": 2.6611162503854455, "grad_norm": 0.06682645529508591, "learning_rate": 0.01, "loss": 2.0508, "step": 25890 }, { "epoch": 2.6614246068455136, "grad_norm": 0.056953735649585724, "learning_rate": 0.01, "loss": 2.0312, "step": 25893 }, { "epoch": 2.6617329633055813, "grad_norm": 0.03709590435028076, "learning_rate": 0.01, "loss": 2.0205, "step": 25896 }, { "epoch": 2.662041319765649, "grad_norm": 0.05895649641752243, "learning_rate": 0.01, "loss": 2.028, "step": 25899 }, { "epoch": 2.662349676225717, "grad_norm": 0.08740679174661636, "learning_rate": 0.01, "loss": 2.0156, "step": 25902 }, { "epoch": 2.6626580326857847, "grad_norm": 0.04745563119649887, "learning_rate": 0.01, "loss": 2.0338, "step": 25905 }, { "epoch": 2.6629663891458524, "grad_norm": 0.08748458325862885, "learning_rate": 0.01, "loss": 2.0392, "step": 25908 }, { "epoch": 2.6632747456059205, "grad_norm": 0.04833563417196274, "learning_rate": 0.01, "loss": 2.0128, "step": 25911 }, { "epoch": 2.663583102065988, "grad_norm": 0.03873222693800926, "learning_rate": 0.01, "loss": 2.0028, "step": 25914 }, { "epoch": 2.6638914585260562, "grad_norm": 0.07129956781864166, "learning_rate": 0.01, "loss": 2.0092, "step": 25917 }, { "epoch": 2.664199814986124, "grad_norm": 0.08984299004077911, "learning_rate": 0.01, "loss": 2.0206, "step": 25920 }, { "epoch": 2.664508171446192, "grad_norm": 0.03720667213201523, "learning_rate": 0.01, "loss": 2.0018, "step": 25923 }, { "epoch": 2.6648165279062597, "grad_norm": 0.09795575588941574, "learning_rate": 0.01, "loss": 2.0373, "step": 25926 }, { "epoch": 2.6651248843663273, "grad_norm": 0.1043957769870758, "learning_rate": 0.01, "loss": 2.0313, "step": 25929 }, { "epoch": 2.6654332408263954, "grad_norm": 0.04737646505236626, "learning_rate": 0.01, "loss": 2.0313, "step": 25932 }, { "epoch": 2.665741597286463, "grad_norm": 0.04864402487874031, "learning_rate": 0.01, "loss": 2.0068, "step": 25935 }, { "epoch": 2.6660499537465308, "grad_norm": 0.0386229082942009, "learning_rate": 0.01, "loss": 2.0259, "step": 25938 }, { "epoch": 2.666358310206599, "grad_norm": 0.04127798601984978, "learning_rate": 0.01, "loss": 2.0463, "step": 25941 }, { "epoch": 2.6666666666666665, "grad_norm": 0.07537275552749634, "learning_rate": 0.01, "loss": 2.0254, "step": 25944 }, { "epoch": 2.6669750231267346, "grad_norm": 0.041533395648002625, "learning_rate": 0.01, "loss": 2.0228, "step": 25947 }, { "epoch": 2.6672833795868023, "grad_norm": 0.045062318444252014, "learning_rate": 0.01, "loss": 2.0428, "step": 25950 }, { "epoch": 2.6675917360468704, "grad_norm": 0.08328119665384293, "learning_rate": 0.01, "loss": 2.0233, "step": 25953 }, { "epoch": 2.667900092506938, "grad_norm": 0.08402698487043381, "learning_rate": 0.01, "loss": 2.0303, "step": 25956 }, { "epoch": 2.6682084489670057, "grad_norm": 0.10571663826704025, "learning_rate": 0.01, "loss": 2.0292, "step": 25959 }, { "epoch": 2.668516805427074, "grad_norm": 0.05094289034605026, "learning_rate": 0.01, "loss": 2.0278, "step": 25962 }, { "epoch": 2.6688251618871415, "grad_norm": 0.034051552414894104, "learning_rate": 0.01, "loss": 2.0265, "step": 25965 }, { "epoch": 2.669133518347209, "grad_norm": 0.05703623592853546, "learning_rate": 0.01, "loss": 2.0272, "step": 25968 }, { "epoch": 2.6694418748072772, "grad_norm": 0.10606861114501953, "learning_rate": 0.01, "loss": 2.0179, "step": 25971 }, { "epoch": 2.669750231267345, "grad_norm": 0.11793088167905807, "learning_rate": 0.01, "loss": 2.0492, "step": 25974 }, { "epoch": 2.670058587727413, "grad_norm": 0.09058693051338196, "learning_rate": 0.01, "loss": 2.0213, "step": 25977 }, { "epoch": 2.6703669441874807, "grad_norm": 0.0677378848195076, "learning_rate": 0.01, "loss": 2.0036, "step": 25980 }, { "epoch": 2.6706753006475488, "grad_norm": 0.05636313930153847, "learning_rate": 0.01, "loss": 2.0318, "step": 25983 }, { "epoch": 2.6709836571076164, "grad_norm": 0.04547611251473427, "learning_rate": 0.01, "loss": 2.0236, "step": 25986 }, { "epoch": 2.671292013567684, "grad_norm": 0.04852728173136711, "learning_rate": 0.01, "loss": 2.0125, "step": 25989 }, { "epoch": 2.671600370027752, "grad_norm": 0.04465510696172714, "learning_rate": 0.01, "loss": 1.9907, "step": 25992 }, { "epoch": 2.67190872648782, "grad_norm": 0.043789103627204895, "learning_rate": 0.01, "loss": 2.0213, "step": 25995 }, { "epoch": 2.6722170829478875, "grad_norm": 0.12845320999622345, "learning_rate": 0.01, "loss": 2.0429, "step": 25998 }, { "epoch": 2.6725254394079556, "grad_norm": 0.054881785064935684, "learning_rate": 0.01, "loss": 2.0252, "step": 26001 }, { "epoch": 2.6728337958680233, "grad_norm": 0.0845002606511116, "learning_rate": 0.01, "loss": 2.0475, "step": 26004 }, { "epoch": 2.6731421523280914, "grad_norm": 0.04782318323850632, "learning_rate": 0.01, "loss": 2.0411, "step": 26007 }, { "epoch": 2.673450508788159, "grad_norm": 0.04288490489125252, "learning_rate": 0.01, "loss": 2.0376, "step": 26010 }, { "epoch": 2.673758865248227, "grad_norm": 0.10904238373041153, "learning_rate": 0.01, "loss": 2.0316, "step": 26013 }, { "epoch": 2.674067221708295, "grad_norm": 0.08276703208684921, "learning_rate": 0.01, "loss": 2.032, "step": 26016 }, { "epoch": 2.6743755781683625, "grad_norm": 0.06076609715819359, "learning_rate": 0.01, "loss": 2.0507, "step": 26019 }, { "epoch": 2.6746839346284306, "grad_norm": 0.043946780264377594, "learning_rate": 0.01, "loss": 2.0188, "step": 26022 }, { "epoch": 2.6749922910884982, "grad_norm": 0.03716238588094711, "learning_rate": 0.01, "loss": 2.0183, "step": 26025 }, { "epoch": 2.675300647548566, "grad_norm": 0.04982864856719971, "learning_rate": 0.01, "loss": 2.0114, "step": 26028 }, { "epoch": 2.675609004008634, "grad_norm": 0.05211315676569939, "learning_rate": 0.01, "loss": 2.0305, "step": 26031 }, { "epoch": 2.6759173604687017, "grad_norm": 0.10215940326452255, "learning_rate": 0.01, "loss": 2.0365, "step": 26034 }, { "epoch": 2.6762257169287698, "grad_norm": 0.07742898166179657, "learning_rate": 0.01, "loss": 2.0173, "step": 26037 }, { "epoch": 2.6765340733888374, "grad_norm": 0.047653328627347946, "learning_rate": 0.01, "loss": 2.0198, "step": 26040 }, { "epoch": 2.6768424298489055, "grad_norm": 0.07179111242294312, "learning_rate": 0.01, "loss": 2.0283, "step": 26043 }, { "epoch": 2.677150786308973, "grad_norm": 0.044947609305381775, "learning_rate": 0.01, "loss": 2.0044, "step": 26046 }, { "epoch": 2.677459142769041, "grad_norm": 0.03128395974636078, "learning_rate": 0.01, "loss": 2.0097, "step": 26049 }, { "epoch": 2.677767499229109, "grad_norm": 0.037758342921733856, "learning_rate": 0.01, "loss": 2.0073, "step": 26052 }, { "epoch": 2.6780758556891766, "grad_norm": 0.059724219143390656, "learning_rate": 0.01, "loss": 2.0304, "step": 26055 }, { "epoch": 2.6783842121492443, "grad_norm": 0.09603296220302582, "learning_rate": 0.01, "loss": 2.0106, "step": 26058 }, { "epoch": 2.6786925686093124, "grad_norm": 0.059488292783498764, "learning_rate": 0.01, "loss": 2.0114, "step": 26061 }, { "epoch": 2.6790009250693805, "grad_norm": 0.07343467324972153, "learning_rate": 0.01, "loss": 2.0494, "step": 26064 }, { "epoch": 2.679309281529448, "grad_norm": 0.054782915860414505, "learning_rate": 0.01, "loss": 2.023, "step": 26067 }, { "epoch": 2.679617637989516, "grad_norm": 0.042636722326278687, "learning_rate": 0.01, "loss": 2.0376, "step": 26070 }, { "epoch": 2.679925994449584, "grad_norm": 0.06803017109632492, "learning_rate": 0.01, "loss": 1.9864, "step": 26073 }, { "epoch": 2.6802343509096516, "grad_norm": 0.10990459471940994, "learning_rate": 0.01, "loss": 2.0343, "step": 26076 }, { "epoch": 2.6805427073697192, "grad_norm": 0.035727545619010925, "learning_rate": 0.01, "loss": 2.0195, "step": 26079 }, { "epoch": 2.6808510638297873, "grad_norm": 0.11241263896226883, "learning_rate": 0.01, "loss": 2.038, "step": 26082 }, { "epoch": 2.681159420289855, "grad_norm": 0.06156554073095322, "learning_rate": 0.01, "loss": 2.0191, "step": 26085 }, { "epoch": 2.6814677767499226, "grad_norm": 0.0609101839363575, "learning_rate": 0.01, "loss": 1.994, "step": 26088 }, { "epoch": 2.6817761332099908, "grad_norm": 0.046859260648489, "learning_rate": 0.01, "loss": 2.0226, "step": 26091 }, { "epoch": 2.682084489670059, "grad_norm": 0.05803006887435913, "learning_rate": 0.01, "loss": 2.0347, "step": 26094 }, { "epoch": 2.6823928461301265, "grad_norm": 0.09425931423902512, "learning_rate": 0.01, "loss": 2.0073, "step": 26097 }, { "epoch": 2.682701202590194, "grad_norm": 0.07893898338079453, "learning_rate": 0.01, "loss": 2.0248, "step": 26100 }, { "epoch": 2.6830095590502623, "grad_norm": 0.044163595885038376, "learning_rate": 0.01, "loss": 2.0217, "step": 26103 }, { "epoch": 2.68331791551033, "grad_norm": 0.061135292053222656, "learning_rate": 0.01, "loss": 2.0006, "step": 26106 }, { "epoch": 2.6836262719703976, "grad_norm": 0.03806147351861, "learning_rate": 0.01, "loss": 2.0226, "step": 26109 }, { "epoch": 2.6839346284304657, "grad_norm": 0.050713907927274704, "learning_rate": 0.01, "loss": 2.0088, "step": 26112 }, { "epoch": 2.6842429848905334, "grad_norm": 0.05512484535574913, "learning_rate": 0.01, "loss": 2.038, "step": 26115 }, { "epoch": 2.684551341350601, "grad_norm": 0.048581019043922424, "learning_rate": 0.01, "loss": 2.0388, "step": 26118 }, { "epoch": 2.684859697810669, "grad_norm": 0.04121122509241104, "learning_rate": 0.01, "loss": 2.0176, "step": 26121 }, { "epoch": 2.6851680542707372, "grad_norm": 0.036977533251047134, "learning_rate": 0.01, "loss": 2.0341, "step": 26124 }, { "epoch": 2.685476410730805, "grad_norm": 0.10436423867940903, "learning_rate": 0.01, "loss": 2.0122, "step": 26127 }, { "epoch": 2.6857847671908726, "grad_norm": 0.05357207730412483, "learning_rate": 0.01, "loss": 2.0076, "step": 26130 }, { "epoch": 2.6860931236509407, "grad_norm": 0.05386963114142418, "learning_rate": 0.01, "loss": 2.0012, "step": 26133 }, { "epoch": 2.6864014801110083, "grad_norm": 0.03591128811240196, "learning_rate": 0.01, "loss": 2.0291, "step": 26136 }, { "epoch": 2.686709836571076, "grad_norm": 0.036740854382514954, "learning_rate": 0.01, "loss": 2.0142, "step": 26139 }, { "epoch": 2.687018193031144, "grad_norm": 0.05966171249747276, "learning_rate": 0.01, "loss": 2.0145, "step": 26142 }, { "epoch": 2.6873265494912117, "grad_norm": 0.10529398918151855, "learning_rate": 0.01, "loss": 2.0091, "step": 26145 }, { "epoch": 2.6876349059512794, "grad_norm": 0.07834989577531815, "learning_rate": 0.01, "loss": 2.0211, "step": 26148 }, { "epoch": 2.6879432624113475, "grad_norm": 0.10702015459537506, "learning_rate": 0.01, "loss": 2.0368, "step": 26151 }, { "epoch": 2.6882516188714156, "grad_norm": 0.04647925868630409, "learning_rate": 0.01, "loss": 2.032, "step": 26154 }, { "epoch": 2.6885599753314833, "grad_norm": 0.04523247852921486, "learning_rate": 0.01, "loss": 2.0129, "step": 26157 }, { "epoch": 2.688868331791551, "grad_norm": 0.04293215274810791, "learning_rate": 0.01, "loss": 1.9982, "step": 26160 }, { "epoch": 2.689176688251619, "grad_norm": 0.06344863772392273, "learning_rate": 0.01, "loss": 2.028, "step": 26163 }, { "epoch": 2.6894850447116867, "grad_norm": 0.047177109867334366, "learning_rate": 0.01, "loss": 2.0154, "step": 26166 }, { "epoch": 2.6897934011717544, "grad_norm": 0.09941892325878143, "learning_rate": 0.01, "loss": 2.0204, "step": 26169 }, { "epoch": 2.6901017576318225, "grad_norm": 0.04238753393292427, "learning_rate": 0.01, "loss": 2.0164, "step": 26172 }, { "epoch": 2.69041011409189, "grad_norm": 0.0550382174551487, "learning_rate": 0.01, "loss": 2.0283, "step": 26175 }, { "epoch": 2.690718470551958, "grad_norm": 0.09788551181554794, "learning_rate": 0.01, "loss": 2.0071, "step": 26178 }, { "epoch": 2.691026827012026, "grad_norm": 0.044239919632673264, "learning_rate": 0.01, "loss": 2.0163, "step": 26181 }, { "epoch": 2.691335183472094, "grad_norm": 0.10677097737789154, "learning_rate": 0.01, "loss": 2.0041, "step": 26184 }, { "epoch": 2.6916435399321617, "grad_norm": 0.04113283380866051, "learning_rate": 0.01, "loss": 2.0061, "step": 26187 }, { "epoch": 2.6919518963922293, "grad_norm": 0.05748388543725014, "learning_rate": 0.01, "loss": 2.0144, "step": 26190 }, { "epoch": 2.6922602528522974, "grad_norm": 0.04931147024035454, "learning_rate": 0.01, "loss": 1.9967, "step": 26193 }, { "epoch": 2.692568609312365, "grad_norm": 0.051582470536231995, "learning_rate": 0.01, "loss": 2.0358, "step": 26196 }, { "epoch": 2.6928769657724327, "grad_norm": 0.1054171696305275, "learning_rate": 0.01, "loss": 2.0273, "step": 26199 }, { "epoch": 2.693185322232501, "grad_norm": 0.09005532413721085, "learning_rate": 0.01, "loss": 2.0256, "step": 26202 }, { "epoch": 2.6934936786925685, "grad_norm": 0.08124802261590958, "learning_rate": 0.01, "loss": 2.0252, "step": 26205 }, { "epoch": 2.6938020351526366, "grad_norm": 0.06238120049238205, "learning_rate": 0.01, "loss": 1.9943, "step": 26208 }, { "epoch": 2.6941103916127043, "grad_norm": 0.0375673770904541, "learning_rate": 0.01, "loss": 2.0124, "step": 26211 }, { "epoch": 2.6944187480727724, "grad_norm": 0.04710889235138893, "learning_rate": 0.01, "loss": 2.0343, "step": 26214 }, { "epoch": 2.69472710453284, "grad_norm": 0.0509529784321785, "learning_rate": 0.01, "loss": 2.0015, "step": 26217 }, { "epoch": 2.6950354609929077, "grad_norm": 0.043452613055706024, "learning_rate": 0.01, "loss": 2.0079, "step": 26220 }, { "epoch": 2.695343817452976, "grad_norm": 0.07350075244903564, "learning_rate": 0.01, "loss": 2.0141, "step": 26223 }, { "epoch": 2.6956521739130435, "grad_norm": 0.07672197371721268, "learning_rate": 0.01, "loss": 2.0002, "step": 26226 }, { "epoch": 2.695960530373111, "grad_norm": 0.1255597323179245, "learning_rate": 0.01, "loss": 2.0403, "step": 26229 }, { "epoch": 2.6962688868331792, "grad_norm": 0.0636356994509697, "learning_rate": 0.01, "loss": 2.0438, "step": 26232 }, { "epoch": 2.696577243293247, "grad_norm": 0.06334753334522247, "learning_rate": 0.01, "loss": 2.0339, "step": 26235 }, { "epoch": 2.696885599753315, "grad_norm": 0.0592232346534729, "learning_rate": 0.01, "loss": 2.0223, "step": 26238 }, { "epoch": 2.6971939562133826, "grad_norm": 0.06983037292957306, "learning_rate": 0.01, "loss": 2.0418, "step": 26241 }, { "epoch": 2.6975023126734508, "grad_norm": 0.042423997074365616, "learning_rate": 0.01, "loss": 2.0219, "step": 26244 }, { "epoch": 2.6978106691335184, "grad_norm": 0.12152963131666183, "learning_rate": 0.01, "loss": 2.026, "step": 26247 }, { "epoch": 2.698119025593586, "grad_norm": 0.03922561556100845, "learning_rate": 0.01, "loss": 1.9994, "step": 26250 }, { "epoch": 2.698427382053654, "grad_norm": 0.04790705442428589, "learning_rate": 0.01, "loss": 2.0155, "step": 26253 }, { "epoch": 2.698735738513722, "grad_norm": 0.0596231147646904, "learning_rate": 0.01, "loss": 2.0317, "step": 26256 }, { "epoch": 2.6990440949737895, "grad_norm": 0.06809087842702866, "learning_rate": 0.01, "loss": 2.001, "step": 26259 }, { "epoch": 2.6993524514338576, "grad_norm": 0.08235184103250504, "learning_rate": 0.01, "loss": 2.0284, "step": 26262 }, { "epoch": 2.6996608078939253, "grad_norm": 0.07293444871902466, "learning_rate": 0.01, "loss": 2.0309, "step": 26265 }, { "epoch": 2.6999691643539934, "grad_norm": 0.04277535527944565, "learning_rate": 0.01, "loss": 2.0236, "step": 26268 }, { "epoch": 2.700277520814061, "grad_norm": 0.10841301828622818, "learning_rate": 0.01, "loss": 2.0275, "step": 26271 }, { "epoch": 2.700585877274129, "grad_norm": 0.04575572907924652, "learning_rate": 0.01, "loss": 2.0331, "step": 26274 }, { "epoch": 2.700894233734197, "grad_norm": 0.0422062948346138, "learning_rate": 0.01, "loss": 2.0441, "step": 26277 }, { "epoch": 2.7012025901942645, "grad_norm": 0.034286245703697205, "learning_rate": 0.01, "loss": 2.0178, "step": 26280 }, { "epoch": 2.7015109466543326, "grad_norm": 0.040598925203084946, "learning_rate": 0.01, "loss": 2.0192, "step": 26283 }, { "epoch": 2.7018193031144, "grad_norm": 0.20375369489192963, "learning_rate": 0.01, "loss": 1.9977, "step": 26286 }, { "epoch": 2.702127659574468, "grad_norm": 0.1908276230096817, "learning_rate": 0.01, "loss": 2.0169, "step": 26289 }, { "epoch": 2.702436016034536, "grad_norm": 0.06826306879520416, "learning_rate": 0.01, "loss": 2.0231, "step": 26292 }, { "epoch": 2.7027443724946036, "grad_norm": 0.03917758911848068, "learning_rate": 0.01, "loss": 1.9938, "step": 26295 }, { "epoch": 2.7030527289546717, "grad_norm": 0.041581716388463974, "learning_rate": 0.01, "loss": 2.0375, "step": 26298 }, { "epoch": 2.7033610854147394, "grad_norm": 0.03759411349892616, "learning_rate": 0.01, "loss": 2.0156, "step": 26301 }, { "epoch": 2.7036694418748075, "grad_norm": 0.043274398893117905, "learning_rate": 0.01, "loss": 2.025, "step": 26304 }, { "epoch": 2.703977798334875, "grad_norm": 0.03156547620892525, "learning_rate": 0.01, "loss": 2.0151, "step": 26307 }, { "epoch": 2.704286154794943, "grad_norm": 0.042134515941143036, "learning_rate": 0.01, "loss": 2.0363, "step": 26310 }, { "epoch": 2.704594511255011, "grad_norm": 0.15473207831382751, "learning_rate": 0.01, "loss": 2.0455, "step": 26313 }, { "epoch": 2.7049028677150786, "grad_norm": 0.16576655209064484, "learning_rate": 0.01, "loss": 1.9929, "step": 26316 }, { "epoch": 2.7052112241751463, "grad_norm": 0.2106962651014328, "learning_rate": 0.01, "loss": 2.0215, "step": 26319 }, { "epoch": 2.7055195806352144, "grad_norm": 0.06543057411909103, "learning_rate": 0.01, "loss": 2.0368, "step": 26322 }, { "epoch": 2.705827937095282, "grad_norm": 0.05747131630778313, "learning_rate": 0.01, "loss": 2.0092, "step": 26325 }, { "epoch": 2.70613629355535, "grad_norm": 0.03801654651761055, "learning_rate": 0.01, "loss": 1.9988, "step": 26328 }, { "epoch": 2.706444650015418, "grad_norm": 0.047034505754709244, "learning_rate": 0.01, "loss": 2.0524, "step": 26331 }, { "epoch": 2.706753006475486, "grad_norm": 0.03980104252696037, "learning_rate": 0.01, "loss": 1.9928, "step": 26334 }, { "epoch": 2.7070613629355536, "grad_norm": 0.04470902681350708, "learning_rate": 0.01, "loss": 2.0276, "step": 26337 }, { "epoch": 2.707369719395621, "grad_norm": 0.03287111967802048, "learning_rate": 0.01, "loss": 2.0186, "step": 26340 }, { "epoch": 2.7076780758556893, "grad_norm": 0.04395715892314911, "learning_rate": 0.01, "loss": 2.0066, "step": 26343 }, { "epoch": 2.707986432315757, "grad_norm": 0.06358876079320908, "learning_rate": 0.01, "loss": 2.0505, "step": 26346 }, { "epoch": 2.7082947887758246, "grad_norm": 0.03925269469618797, "learning_rate": 0.01, "loss": 2.0149, "step": 26349 }, { "epoch": 2.7086031452358927, "grad_norm": 0.16810861229896545, "learning_rate": 0.01, "loss": 2.0433, "step": 26352 }, { "epoch": 2.7089115016959604, "grad_norm": 0.09612049907445908, "learning_rate": 0.01, "loss": 2.0014, "step": 26355 }, { "epoch": 2.7092198581560285, "grad_norm": 0.048840370029211044, "learning_rate": 0.01, "loss": 2.0062, "step": 26358 }, { "epoch": 2.709528214616096, "grad_norm": 0.03775126487016678, "learning_rate": 0.01, "loss": 2.0149, "step": 26361 }, { "epoch": 2.7098365710761643, "grad_norm": 0.04588973522186279, "learning_rate": 0.01, "loss": 2.031, "step": 26364 }, { "epoch": 2.710144927536232, "grad_norm": 0.061829451471567154, "learning_rate": 0.01, "loss": 2.0076, "step": 26367 }, { "epoch": 2.7104532839962996, "grad_norm": 0.053572434931993484, "learning_rate": 0.01, "loss": 2.0167, "step": 26370 }, { "epoch": 2.7107616404563677, "grad_norm": 0.0460902638733387, "learning_rate": 0.01, "loss": 2.0515, "step": 26373 }, { "epoch": 2.7110699969164354, "grad_norm": 0.07622378319501877, "learning_rate": 0.01, "loss": 2.0229, "step": 26376 }, { "epoch": 2.711378353376503, "grad_norm": 0.04137422889471054, "learning_rate": 0.01, "loss": 1.9985, "step": 26379 }, { "epoch": 2.711686709836571, "grad_norm": 0.052519541233778, "learning_rate": 0.01, "loss": 2.0166, "step": 26382 }, { "epoch": 2.711995066296639, "grad_norm": 0.13548482954502106, "learning_rate": 0.01, "loss": 2.0221, "step": 26385 }, { "epoch": 2.712303422756707, "grad_norm": 0.05124654993414879, "learning_rate": 0.01, "loss": 2.02, "step": 26388 }, { "epoch": 2.7126117792167745, "grad_norm": 0.04233216866850853, "learning_rate": 0.01, "loss": 2.0131, "step": 26391 }, { "epoch": 2.7129201356768426, "grad_norm": 0.04341414198279381, "learning_rate": 0.01, "loss": 2.0088, "step": 26394 }, { "epoch": 2.7132284921369103, "grad_norm": 0.078862264752388, "learning_rate": 0.01, "loss": 2.0098, "step": 26397 }, { "epoch": 2.713536848596978, "grad_norm": 0.08351139724254608, "learning_rate": 0.01, "loss": 2.0078, "step": 26400 }, { "epoch": 2.713845205057046, "grad_norm": 0.10899659991264343, "learning_rate": 0.01, "loss": 2.0354, "step": 26403 }, { "epoch": 2.7141535615171137, "grad_norm": 0.11437533795833588, "learning_rate": 0.01, "loss": 2.0145, "step": 26406 }, { "epoch": 2.7144619179771814, "grad_norm": 0.04197553172707558, "learning_rate": 0.01, "loss": 2.01, "step": 26409 }, { "epoch": 2.7147702744372495, "grad_norm": 0.054637305438518524, "learning_rate": 0.01, "loss": 2.0087, "step": 26412 }, { "epoch": 2.715078630897317, "grad_norm": 0.058579228818416595, "learning_rate": 0.01, "loss": 2.0132, "step": 26415 }, { "epoch": 2.7153869873573853, "grad_norm": 0.03501029685139656, "learning_rate": 0.01, "loss": 2.0404, "step": 26418 }, { "epoch": 2.715695343817453, "grad_norm": 0.04865582287311554, "learning_rate": 0.01, "loss": 2.0221, "step": 26421 }, { "epoch": 2.716003700277521, "grad_norm": 0.08878735452890396, "learning_rate": 0.01, "loss": 2.0376, "step": 26424 }, { "epoch": 2.7163120567375887, "grad_norm": 0.06919421255588531, "learning_rate": 0.01, "loss": 2.0125, "step": 26427 }, { "epoch": 2.7166204131976563, "grad_norm": 0.05429752171039581, "learning_rate": 0.01, "loss": 2.0217, "step": 26430 }, { "epoch": 2.7169287696577245, "grad_norm": 0.10294333100318909, "learning_rate": 0.01, "loss": 2.0036, "step": 26433 }, { "epoch": 2.717237126117792, "grad_norm": 0.0625937432050705, "learning_rate": 0.01, "loss": 2.0137, "step": 26436 }, { "epoch": 2.7175454825778598, "grad_norm": 0.048780061304569244, "learning_rate": 0.01, "loss": 2.0324, "step": 26439 }, { "epoch": 2.717853839037928, "grad_norm": 0.03840133175253868, "learning_rate": 0.01, "loss": 2.0035, "step": 26442 }, { "epoch": 2.7181621954979955, "grad_norm": 0.04021105915307999, "learning_rate": 0.01, "loss": 2.0172, "step": 26445 }, { "epoch": 2.7184705519580636, "grad_norm": 0.04241623356938362, "learning_rate": 0.01, "loss": 2.0192, "step": 26448 }, { "epoch": 2.7187789084181313, "grad_norm": 0.04488721489906311, "learning_rate": 0.01, "loss": 2.0275, "step": 26451 }, { "epoch": 2.7190872648781994, "grad_norm": 0.09730737656354904, "learning_rate": 0.01, "loss": 2.0064, "step": 26454 }, { "epoch": 2.719395621338267, "grad_norm": 0.05743822455406189, "learning_rate": 0.01, "loss": 2.0294, "step": 26457 }, { "epoch": 2.7197039777983347, "grad_norm": 0.042568083852529526, "learning_rate": 0.01, "loss": 2.0172, "step": 26460 }, { "epoch": 2.720012334258403, "grad_norm": 0.050452932715415955, "learning_rate": 0.01, "loss": 2.0354, "step": 26463 }, { "epoch": 2.7203206907184705, "grad_norm": 0.04751294478774071, "learning_rate": 0.01, "loss": 2.0242, "step": 26466 }, { "epoch": 2.720629047178538, "grad_norm": 0.12711678445339203, "learning_rate": 0.01, "loss": 2.0163, "step": 26469 }, { "epoch": 2.7209374036386063, "grad_norm": 0.043656837195158005, "learning_rate": 0.01, "loss": 2.0234, "step": 26472 }, { "epoch": 2.721245760098674, "grad_norm": 0.061641763895750046, "learning_rate": 0.01, "loss": 2.0436, "step": 26475 }, { "epoch": 2.721554116558742, "grad_norm": 0.04843028262257576, "learning_rate": 0.01, "loss": 2.0422, "step": 26478 }, { "epoch": 2.7218624730188097, "grad_norm": 0.05009736493229866, "learning_rate": 0.01, "loss": 2.025, "step": 26481 }, { "epoch": 2.722170829478878, "grad_norm": 0.0330355204641819, "learning_rate": 0.01, "loss": 2.038, "step": 26484 }, { "epoch": 2.7224791859389454, "grad_norm": 0.05958884581923485, "learning_rate": 0.01, "loss": 1.9874, "step": 26487 }, { "epoch": 2.722787542399013, "grad_norm": 0.10207222402095795, "learning_rate": 0.01, "loss": 2.0332, "step": 26490 }, { "epoch": 2.723095898859081, "grad_norm": 0.06909897923469543, "learning_rate": 0.01, "loss": 2.0262, "step": 26493 }, { "epoch": 2.723404255319149, "grad_norm": 0.09485501050949097, "learning_rate": 0.01, "loss": 2.0064, "step": 26496 }, { "epoch": 2.7237126117792165, "grad_norm": 0.0501030869781971, "learning_rate": 0.01, "loss": 2.03, "step": 26499 }, { "epoch": 2.7240209682392846, "grad_norm": 0.07009368389844894, "learning_rate": 0.01, "loss": 2.0152, "step": 26502 }, { "epoch": 2.7243293246993523, "grad_norm": 0.0766390711069107, "learning_rate": 0.01, "loss": 2.0315, "step": 26505 }, { "epoch": 2.7246376811594204, "grad_norm": 0.050900235772132874, "learning_rate": 0.01, "loss": 1.9763, "step": 26508 }, { "epoch": 2.724946037619488, "grad_norm": 0.043463658541440964, "learning_rate": 0.01, "loss": 1.9971, "step": 26511 }, { "epoch": 2.725254394079556, "grad_norm": 0.07331634312868118, "learning_rate": 0.01, "loss": 2.0331, "step": 26514 }, { "epoch": 2.725562750539624, "grad_norm": 0.06863999366760254, "learning_rate": 0.01, "loss": 2.0025, "step": 26517 }, { "epoch": 2.7258711069996915, "grad_norm": 0.12374615669250488, "learning_rate": 0.01, "loss": 2.019, "step": 26520 }, { "epoch": 2.7261794634597596, "grad_norm": 0.05284014344215393, "learning_rate": 0.01, "loss": 2.0116, "step": 26523 }, { "epoch": 2.7264878199198272, "grad_norm": 0.07776612043380737, "learning_rate": 0.01, "loss": 2.0139, "step": 26526 }, { "epoch": 2.726796176379895, "grad_norm": 0.0325651615858078, "learning_rate": 0.01, "loss": 2.0118, "step": 26529 }, { "epoch": 2.727104532839963, "grad_norm": 0.03690047189593315, "learning_rate": 0.01, "loss": 2.029, "step": 26532 }, { "epoch": 2.7274128893000307, "grad_norm": 0.12211088836193085, "learning_rate": 0.01, "loss": 2.0267, "step": 26535 }, { "epoch": 2.727721245760099, "grad_norm": 0.05561113730072975, "learning_rate": 0.01, "loss": 2.0318, "step": 26538 }, { "epoch": 2.7280296022201664, "grad_norm": 0.05541878193616867, "learning_rate": 0.01, "loss": 2.0095, "step": 26541 }, { "epoch": 2.7283379586802345, "grad_norm": 0.037987880408763885, "learning_rate": 0.01, "loss": 2.0295, "step": 26544 }, { "epoch": 2.728646315140302, "grad_norm": 0.0748986229300499, "learning_rate": 0.01, "loss": 2.039, "step": 26547 }, { "epoch": 2.72895467160037, "grad_norm": 0.05377615988254547, "learning_rate": 0.01, "loss": 2.0223, "step": 26550 }, { "epoch": 2.729263028060438, "grad_norm": 0.11756724119186401, "learning_rate": 0.01, "loss": 2.0119, "step": 26553 }, { "epoch": 2.7295713845205056, "grad_norm": 0.046288661658763885, "learning_rate": 0.01, "loss": 2.0479, "step": 26556 }, { "epoch": 2.7298797409805733, "grad_norm": 0.041496362537145615, "learning_rate": 0.01, "loss": 2.0247, "step": 26559 }, { "epoch": 2.7301880974406414, "grad_norm": 0.036013491451740265, "learning_rate": 0.01, "loss": 2.0143, "step": 26562 }, { "epoch": 2.7304964539007095, "grad_norm": 0.044654857367277145, "learning_rate": 0.01, "loss": 2.0032, "step": 26565 }, { "epoch": 2.730804810360777, "grad_norm": 0.10827535390853882, "learning_rate": 0.01, "loss": 2.037, "step": 26568 }, { "epoch": 2.731113166820845, "grad_norm": 0.053766943514347076, "learning_rate": 0.01, "loss": 2.0142, "step": 26571 }, { "epoch": 2.731421523280913, "grad_norm": 0.10854397714138031, "learning_rate": 0.01, "loss": 2.0367, "step": 26574 }, { "epoch": 2.7317298797409806, "grad_norm": 0.10521306842565536, "learning_rate": 0.01, "loss": 2.0242, "step": 26577 }, { "epoch": 2.7320382362010482, "grad_norm": 0.06532178819179535, "learning_rate": 0.01, "loss": 2.0305, "step": 26580 }, { "epoch": 2.7323465926611163, "grad_norm": 0.06266357749700546, "learning_rate": 0.01, "loss": 2.0257, "step": 26583 }, { "epoch": 2.732654949121184, "grad_norm": 0.04027451574802399, "learning_rate": 0.01, "loss": 2.0301, "step": 26586 }, { "epoch": 2.7329633055812517, "grad_norm": 0.06135137006640434, "learning_rate": 0.01, "loss": 2.0116, "step": 26589 }, { "epoch": 2.7332716620413198, "grad_norm": 0.08157611638307571, "learning_rate": 0.01, "loss": 2.041, "step": 26592 }, { "epoch": 2.733580018501388, "grad_norm": 0.060507632791996, "learning_rate": 0.01, "loss": 2.0404, "step": 26595 }, { "epoch": 2.7338883749614555, "grad_norm": 0.07385814189910889, "learning_rate": 0.01, "loss": 2.0496, "step": 26598 }, { "epoch": 2.734196731421523, "grad_norm": 0.12423606961965561, "learning_rate": 0.01, "loss": 2.0281, "step": 26601 }, { "epoch": 2.7345050878815913, "grad_norm": 0.09299054741859436, "learning_rate": 0.01, "loss": 1.9909, "step": 26604 }, { "epoch": 2.734813444341659, "grad_norm": 0.07102067768573761, "learning_rate": 0.01, "loss": 2.0175, "step": 26607 }, { "epoch": 2.7351218008017266, "grad_norm": 0.036449629813432693, "learning_rate": 0.01, "loss": 2.0253, "step": 26610 }, { "epoch": 2.7354301572617947, "grad_norm": 0.11397985368967056, "learning_rate": 0.01, "loss": 2.0372, "step": 26613 }, { "epoch": 2.7357385137218624, "grad_norm": 0.0781063437461853, "learning_rate": 0.01, "loss": 2.0191, "step": 26616 }, { "epoch": 2.73604687018193, "grad_norm": 0.05996118485927582, "learning_rate": 0.01, "loss": 2.0221, "step": 26619 }, { "epoch": 2.736355226641998, "grad_norm": 0.05300947278738022, "learning_rate": 0.01, "loss": 2.032, "step": 26622 }, { "epoch": 2.7366635831020663, "grad_norm": 0.048238106071949005, "learning_rate": 0.01, "loss": 2.0335, "step": 26625 }, { "epoch": 2.736971939562134, "grad_norm": 0.09588511288166046, "learning_rate": 0.01, "loss": 2.0418, "step": 26628 }, { "epoch": 2.7372802960222016, "grad_norm": 0.04981528967618942, "learning_rate": 0.01, "loss": 2.0125, "step": 26631 }, { "epoch": 2.7375886524822697, "grad_norm": 0.06809774041175842, "learning_rate": 0.01, "loss": 2.0033, "step": 26634 }, { "epoch": 2.7378970089423373, "grad_norm": 0.08924854546785355, "learning_rate": 0.01, "loss": 2.0026, "step": 26637 }, { "epoch": 2.738205365402405, "grad_norm": 0.06717575341463089, "learning_rate": 0.01, "loss": 2.039, "step": 26640 }, { "epoch": 2.738513721862473, "grad_norm": 0.08358625322580338, "learning_rate": 0.01, "loss": 2.0165, "step": 26643 }, { "epoch": 2.7388220783225408, "grad_norm": 0.14468573033809662, "learning_rate": 0.01, "loss": 2.014, "step": 26646 }, { "epoch": 2.7391304347826084, "grad_norm": 0.0533704049885273, "learning_rate": 0.01, "loss": 2.0268, "step": 26649 }, { "epoch": 2.7394387912426765, "grad_norm": 0.052842382341623306, "learning_rate": 0.01, "loss": 2.0334, "step": 26652 }, { "epoch": 2.7397471477027446, "grad_norm": 0.04230334609746933, "learning_rate": 0.01, "loss": 2.0077, "step": 26655 }, { "epoch": 2.7400555041628123, "grad_norm": 0.1004776731133461, "learning_rate": 0.01, "loss": 2.0254, "step": 26658 }, { "epoch": 2.74036386062288, "grad_norm": 0.03724560886621475, "learning_rate": 0.01, "loss": 2.0188, "step": 26661 }, { "epoch": 2.740672217082948, "grad_norm": 0.04997468739748001, "learning_rate": 0.01, "loss": 2.0229, "step": 26664 }, { "epoch": 2.7409805735430157, "grad_norm": 0.08138549327850342, "learning_rate": 0.01, "loss": 2.0138, "step": 26667 }, { "epoch": 2.7412889300030834, "grad_norm": 0.06195824220776558, "learning_rate": 0.01, "loss": 2.0282, "step": 26670 }, { "epoch": 2.7415972864631515, "grad_norm": 0.07340802997350693, "learning_rate": 0.01, "loss": 2.0226, "step": 26673 }, { "epoch": 2.741905642923219, "grad_norm": 0.09338308125734329, "learning_rate": 0.01, "loss": 2.0374, "step": 26676 }, { "epoch": 2.742213999383287, "grad_norm": 0.03742242231965065, "learning_rate": 0.01, "loss": 2.0313, "step": 26679 }, { "epoch": 2.742522355843355, "grad_norm": 0.04461616277694702, "learning_rate": 0.01, "loss": 2.0463, "step": 26682 }, { "epoch": 2.742830712303423, "grad_norm": 0.062195319682359695, "learning_rate": 0.01, "loss": 1.9822, "step": 26685 }, { "epoch": 2.7431390687634907, "grad_norm": 0.12009324878454208, "learning_rate": 0.01, "loss": 2.003, "step": 26688 }, { "epoch": 2.7434474252235583, "grad_norm": 0.04727381840348244, "learning_rate": 0.01, "loss": 2.0264, "step": 26691 }, { "epoch": 2.7437557816836264, "grad_norm": 0.05080636590719223, "learning_rate": 0.01, "loss": 2.033, "step": 26694 }, { "epoch": 2.744064138143694, "grad_norm": 0.11046820133924484, "learning_rate": 0.01, "loss": 2.0399, "step": 26697 }, { "epoch": 2.7443724946037618, "grad_norm": 0.06709396839141846, "learning_rate": 0.01, "loss": 2.0171, "step": 26700 }, { "epoch": 2.74468085106383, "grad_norm": 0.05088644102215767, "learning_rate": 0.01, "loss": 2.0221, "step": 26703 }, { "epoch": 2.7449892075238975, "grad_norm": 0.06446198374032974, "learning_rate": 0.01, "loss": 2.0061, "step": 26706 }, { "epoch": 2.7452975639839656, "grad_norm": 0.07209669798612595, "learning_rate": 0.01, "loss": 2.0058, "step": 26709 }, { "epoch": 2.7456059204440333, "grad_norm": 0.0958387553691864, "learning_rate": 0.01, "loss": 2.0106, "step": 26712 }, { "epoch": 2.7459142769041014, "grad_norm": 0.04666278511285782, "learning_rate": 0.01, "loss": 2.0223, "step": 26715 }, { "epoch": 2.746222633364169, "grad_norm": 0.0713481456041336, "learning_rate": 0.01, "loss": 2.0357, "step": 26718 }, { "epoch": 2.7465309898242367, "grad_norm": 0.060584791004657745, "learning_rate": 0.01, "loss": 1.999, "step": 26721 }, { "epoch": 2.746839346284305, "grad_norm": 0.08711333572864532, "learning_rate": 0.01, "loss": 2.0386, "step": 26724 }, { "epoch": 2.7471477027443725, "grad_norm": 0.04297727718949318, "learning_rate": 0.01, "loss": 2.0163, "step": 26727 }, { "epoch": 2.74745605920444, "grad_norm": 0.06777355074882507, "learning_rate": 0.01, "loss": 2.0318, "step": 26730 }, { "epoch": 2.7477644156645082, "grad_norm": 0.05725346878170967, "learning_rate": 0.01, "loss": 2.0395, "step": 26733 }, { "epoch": 2.748072772124576, "grad_norm": 0.0601140521466732, "learning_rate": 0.01, "loss": 2.0176, "step": 26736 }, { "epoch": 2.748381128584644, "grad_norm": 0.04107888415455818, "learning_rate": 0.01, "loss": 1.9972, "step": 26739 }, { "epoch": 2.7486894850447117, "grad_norm": 0.03484128415584564, "learning_rate": 0.01, "loss": 2.0018, "step": 26742 }, { "epoch": 2.7489978415047798, "grad_norm": 0.05098375305533409, "learning_rate": 0.01, "loss": 1.9977, "step": 26745 }, { "epoch": 2.7493061979648474, "grad_norm": 0.1751684993505478, "learning_rate": 0.01, "loss": 2.0183, "step": 26748 }, { "epoch": 2.749614554424915, "grad_norm": 0.09155084937810898, "learning_rate": 0.01, "loss": 2.0239, "step": 26751 }, { "epoch": 2.749922910884983, "grad_norm": 0.05078737810254097, "learning_rate": 0.01, "loss": 2.0396, "step": 26754 }, { "epoch": 2.750231267345051, "grad_norm": 0.03406425565481186, "learning_rate": 0.01, "loss": 2.0378, "step": 26757 }, { "epoch": 2.7505396238051185, "grad_norm": 0.07126470655202866, "learning_rate": 0.01, "loss": 2.0391, "step": 26760 }, { "epoch": 2.7508479802651866, "grad_norm": 0.05165224149823189, "learning_rate": 0.01, "loss": 2.0454, "step": 26763 }, { "epoch": 2.7511563367252543, "grad_norm": 0.04024217650294304, "learning_rate": 0.01, "loss": 1.9988, "step": 26766 }, { "epoch": 2.7514646931853224, "grad_norm": 0.05387435853481293, "learning_rate": 0.01, "loss": 2.0263, "step": 26769 }, { "epoch": 2.75177304964539, "grad_norm": 0.11387746036052704, "learning_rate": 0.01, "loss": 2.0191, "step": 26772 }, { "epoch": 2.752081406105458, "grad_norm": 0.06504768133163452, "learning_rate": 0.01, "loss": 2.0359, "step": 26775 }, { "epoch": 2.752389762565526, "grad_norm": 0.07217580825090408, "learning_rate": 0.01, "loss": 1.9967, "step": 26778 }, { "epoch": 2.7526981190255935, "grad_norm": 0.07751470804214478, "learning_rate": 0.01, "loss": 2.0171, "step": 26781 }, { "epoch": 2.7530064754856616, "grad_norm": 0.06282947957515717, "learning_rate": 0.01, "loss": 2.0022, "step": 26784 }, { "epoch": 2.7533148319457292, "grad_norm": 0.04301406815648079, "learning_rate": 0.01, "loss": 2.0062, "step": 26787 }, { "epoch": 2.753623188405797, "grad_norm": 0.06154756247997284, "learning_rate": 0.01, "loss": 2.0212, "step": 26790 }, { "epoch": 2.753931544865865, "grad_norm": 0.04729504510760307, "learning_rate": 0.01, "loss": 2.0192, "step": 26793 }, { "epoch": 2.7542399013259327, "grad_norm": 0.06597118079662323, "learning_rate": 0.01, "loss": 2.0081, "step": 26796 }, { "epoch": 2.7545482577860008, "grad_norm": 0.050065554678440094, "learning_rate": 0.01, "loss": 2.0004, "step": 26799 }, { "epoch": 2.7548566142460684, "grad_norm": 0.048531875014305115, "learning_rate": 0.01, "loss": 2.0158, "step": 26802 }, { "epoch": 2.7551649707061365, "grad_norm": 0.04954374581575394, "learning_rate": 0.01, "loss": 2.007, "step": 26805 }, { "epoch": 2.755473327166204, "grad_norm": 0.06998781114816666, "learning_rate": 0.01, "loss": 2.0537, "step": 26808 }, { "epoch": 2.755781683626272, "grad_norm": 0.08933461457490921, "learning_rate": 0.01, "loss": 2.0058, "step": 26811 }, { "epoch": 2.75609004008634, "grad_norm": 0.06067082658410072, "learning_rate": 0.01, "loss": 2.0318, "step": 26814 }, { "epoch": 2.7563983965464076, "grad_norm": 0.07710520923137665, "learning_rate": 0.01, "loss": 2.0029, "step": 26817 }, { "epoch": 2.7567067530064753, "grad_norm": 0.05703970044851303, "learning_rate": 0.01, "loss": 2.0505, "step": 26820 }, { "epoch": 2.7570151094665434, "grad_norm": 0.1251015067100525, "learning_rate": 0.01, "loss": 2.0264, "step": 26823 }, { "epoch": 2.757323465926611, "grad_norm": 0.05942771956324577, "learning_rate": 0.01, "loss": 2.0368, "step": 26826 }, { "epoch": 2.757631822386679, "grad_norm": 0.03195277228951454, "learning_rate": 0.01, "loss": 1.9868, "step": 26829 }, { "epoch": 2.757940178846747, "grad_norm": 0.03609882667660713, "learning_rate": 0.01, "loss": 2.0208, "step": 26832 }, { "epoch": 2.758248535306815, "grad_norm": 0.04004659876227379, "learning_rate": 0.01, "loss": 1.9996, "step": 26835 }, { "epoch": 2.7585568917668826, "grad_norm": 0.06475567817687988, "learning_rate": 0.01, "loss": 2.0187, "step": 26838 }, { "epoch": 2.7588652482269502, "grad_norm": 0.04894067347049713, "learning_rate": 0.01, "loss": 2.0342, "step": 26841 }, { "epoch": 2.7591736046870183, "grad_norm": 0.11116486042737961, "learning_rate": 0.01, "loss": 2.0111, "step": 26844 }, { "epoch": 2.759481961147086, "grad_norm": 0.0832393690943718, "learning_rate": 0.01, "loss": 1.9986, "step": 26847 }, { "epoch": 2.7597903176071537, "grad_norm": 0.06433276832103729, "learning_rate": 0.01, "loss": 2.0233, "step": 26850 }, { "epoch": 2.7600986740672218, "grad_norm": 0.06255584210157394, "learning_rate": 0.01, "loss": 2.0097, "step": 26853 }, { "epoch": 2.7604070305272894, "grad_norm": 0.06610293686389923, "learning_rate": 0.01, "loss": 2.0223, "step": 26856 }, { "epoch": 2.7607153869873575, "grad_norm": 0.048567045480012894, "learning_rate": 0.01, "loss": 1.9865, "step": 26859 }, { "epoch": 2.761023743447425, "grad_norm": 0.06945166736841202, "learning_rate": 0.01, "loss": 2.0189, "step": 26862 }, { "epoch": 2.7613320999074933, "grad_norm": 0.07567547261714935, "learning_rate": 0.01, "loss": 2.0356, "step": 26865 }, { "epoch": 2.761640456367561, "grad_norm": 0.09087162464857101, "learning_rate": 0.01, "loss": 2.0395, "step": 26868 }, { "epoch": 2.7619488128276286, "grad_norm": 0.07904476672410965, "learning_rate": 0.01, "loss": 2.0028, "step": 26871 }, { "epoch": 2.7622571692876967, "grad_norm": 0.08835271000862122, "learning_rate": 0.01, "loss": 2.0364, "step": 26874 }, { "epoch": 2.7625655257477644, "grad_norm": 0.042680736631155014, "learning_rate": 0.01, "loss": 2.0206, "step": 26877 }, { "epoch": 2.762873882207832, "grad_norm": 0.034312695264816284, "learning_rate": 0.01, "loss": 2.0124, "step": 26880 }, { "epoch": 2.7631822386679, "grad_norm": 0.04073645919561386, "learning_rate": 0.01, "loss": 2.0207, "step": 26883 }, { "epoch": 2.763490595127968, "grad_norm": 0.11242496222257614, "learning_rate": 0.01, "loss": 2.0234, "step": 26886 }, { "epoch": 2.763798951588036, "grad_norm": 0.048365022987127304, "learning_rate": 0.01, "loss": 2.0223, "step": 26889 }, { "epoch": 2.7641073080481036, "grad_norm": 0.06508596241474152, "learning_rate": 0.01, "loss": 2.0067, "step": 26892 }, { "epoch": 2.7644156645081717, "grad_norm": 0.08372494578361511, "learning_rate": 0.01, "loss": 2.024, "step": 26895 }, { "epoch": 2.7647240209682393, "grad_norm": 0.08499015122652054, "learning_rate": 0.01, "loss": 2.0337, "step": 26898 }, { "epoch": 2.765032377428307, "grad_norm": 0.08214948326349258, "learning_rate": 0.01, "loss": 2.0235, "step": 26901 }, { "epoch": 2.765340733888375, "grad_norm": 0.0902111828327179, "learning_rate": 0.01, "loss": 2.0319, "step": 26904 }, { "epoch": 2.7656490903484428, "grad_norm": 0.0556274875998497, "learning_rate": 0.01, "loss": 2.0096, "step": 26907 }, { "epoch": 2.7659574468085104, "grad_norm": 0.06845957040786743, "learning_rate": 0.01, "loss": 1.9983, "step": 26910 }, { "epoch": 2.7662658032685785, "grad_norm": 0.051283106207847595, "learning_rate": 0.01, "loss": 2.009, "step": 26913 }, { "epoch": 2.766574159728646, "grad_norm": 0.03301118686795235, "learning_rate": 0.01, "loss": 1.9983, "step": 26916 }, { "epoch": 2.7668825161887143, "grad_norm": 0.05253078415989876, "learning_rate": 0.01, "loss": 2.0274, "step": 26919 }, { "epoch": 2.767190872648782, "grad_norm": 0.1089513972401619, "learning_rate": 0.01, "loss": 2.044, "step": 26922 }, { "epoch": 2.76749922910885, "grad_norm": 0.06042607128620148, "learning_rate": 0.01, "loss": 1.9767, "step": 26925 }, { "epoch": 2.7678075855689177, "grad_norm": 0.09722250699996948, "learning_rate": 0.01, "loss": 2.0136, "step": 26928 }, { "epoch": 2.7681159420289854, "grad_norm": 0.04542861133813858, "learning_rate": 0.01, "loss": 2.0, "step": 26931 }, { "epoch": 2.7684242984890535, "grad_norm": 0.14705310761928558, "learning_rate": 0.01, "loss": 2.0074, "step": 26934 }, { "epoch": 2.768732654949121, "grad_norm": 0.03334478661417961, "learning_rate": 0.01, "loss": 2.0104, "step": 26937 }, { "epoch": 2.769041011409189, "grad_norm": 0.054583244025707245, "learning_rate": 0.01, "loss": 2.0111, "step": 26940 }, { "epoch": 2.769349367869257, "grad_norm": 0.03972140699625015, "learning_rate": 0.01, "loss": 2.0327, "step": 26943 }, { "epoch": 2.7696577243293246, "grad_norm": 0.04227004200220108, "learning_rate": 0.01, "loss": 2.052, "step": 26946 }, { "epoch": 2.7699660807893927, "grad_norm": 0.035285089164972305, "learning_rate": 0.01, "loss": 2.0224, "step": 26949 }, { "epoch": 2.7702744372494603, "grad_norm": 0.08037975430488586, "learning_rate": 0.01, "loss": 2.0201, "step": 26952 }, { "epoch": 2.7705827937095284, "grad_norm": 0.06296487152576447, "learning_rate": 0.01, "loss": 2.0222, "step": 26955 }, { "epoch": 2.770891150169596, "grad_norm": 0.08724946528673172, "learning_rate": 0.01, "loss": 2.0515, "step": 26958 }, { "epoch": 2.7711995066296637, "grad_norm": 0.06455433368682861, "learning_rate": 0.01, "loss": 2.0573, "step": 26961 }, { "epoch": 2.771507863089732, "grad_norm": 0.07615622133016586, "learning_rate": 0.01, "loss": 2.0177, "step": 26964 }, { "epoch": 2.7718162195497995, "grad_norm": 0.06695935130119324, "learning_rate": 0.01, "loss": 2.031, "step": 26967 }, { "epoch": 2.772124576009867, "grad_norm": 0.04257410392165184, "learning_rate": 0.01, "loss": 2.0186, "step": 26970 }, { "epoch": 2.7724329324699353, "grad_norm": 0.1329718828201294, "learning_rate": 0.01, "loss": 2.014, "step": 26973 }, { "epoch": 2.772741288930003, "grad_norm": 0.06937066465616226, "learning_rate": 0.01, "loss": 2.0185, "step": 26976 }, { "epoch": 2.773049645390071, "grad_norm": 0.07017137110233307, "learning_rate": 0.01, "loss": 2.0066, "step": 26979 }, { "epoch": 2.7733580018501387, "grad_norm": 0.03589661046862602, "learning_rate": 0.01, "loss": 2.0338, "step": 26982 }, { "epoch": 2.773666358310207, "grad_norm": 0.04110831022262573, "learning_rate": 0.01, "loss": 2.0054, "step": 26985 }, { "epoch": 2.7739747147702745, "grad_norm": 0.06182010471820831, "learning_rate": 0.01, "loss": 2.029, "step": 26988 }, { "epoch": 2.774283071230342, "grad_norm": 0.08161917328834534, "learning_rate": 0.01, "loss": 2.0191, "step": 26991 }, { "epoch": 2.7745914276904102, "grad_norm": 0.05832149460911751, "learning_rate": 0.01, "loss": 2.0279, "step": 26994 }, { "epoch": 2.774899784150478, "grad_norm": 0.08298707008361816, "learning_rate": 0.01, "loss": 2.0351, "step": 26997 }, { "epoch": 2.7752081406105455, "grad_norm": 0.04794152081012726, "learning_rate": 0.01, "loss": 2.0158, "step": 27000 }, { "epoch": 2.7755164970706137, "grad_norm": 0.04620853811502457, "learning_rate": 0.01, "loss": 2.029, "step": 27003 }, { "epoch": 2.7758248535306813, "grad_norm": 0.045553676784038544, "learning_rate": 0.01, "loss": 1.9998, "step": 27006 }, { "epoch": 2.7761332099907494, "grad_norm": 0.058516908437013626, "learning_rate": 0.01, "loss": 2.0324, "step": 27009 }, { "epoch": 2.776441566450817, "grad_norm": 0.05661854147911072, "learning_rate": 0.01, "loss": 2.0331, "step": 27012 }, { "epoch": 2.776749922910885, "grad_norm": 0.05626550316810608, "learning_rate": 0.01, "loss": 2.016, "step": 27015 }, { "epoch": 2.777058279370953, "grad_norm": 0.08505663275718689, "learning_rate": 0.01, "loss": 2.014, "step": 27018 }, { "epoch": 2.7773666358310205, "grad_norm": 0.08449403196573257, "learning_rate": 0.01, "loss": 2.0141, "step": 27021 }, { "epoch": 2.7776749922910886, "grad_norm": 0.08090987801551819, "learning_rate": 0.01, "loss": 2.0313, "step": 27024 }, { "epoch": 2.7779833487511563, "grad_norm": 0.03180038183927536, "learning_rate": 0.01, "loss": 2.0105, "step": 27027 }, { "epoch": 2.778291705211224, "grad_norm": 0.03052234649658203, "learning_rate": 0.01, "loss": 2.0144, "step": 27030 }, { "epoch": 2.778600061671292, "grad_norm": 0.07030376046895981, "learning_rate": 0.01, "loss": 2.0101, "step": 27033 }, { "epoch": 2.7789084181313597, "grad_norm": 0.1097898781299591, "learning_rate": 0.01, "loss": 2.016, "step": 27036 }, { "epoch": 2.779216774591428, "grad_norm": 0.10938852280378342, "learning_rate": 0.01, "loss": 2.0358, "step": 27039 }, { "epoch": 2.7795251310514955, "grad_norm": 0.17036540806293488, "learning_rate": 0.01, "loss": 2.0242, "step": 27042 }, { "epoch": 2.7798334875115636, "grad_norm": 0.07512038946151733, "learning_rate": 0.01, "loss": 2.0142, "step": 27045 }, { "epoch": 2.780141843971631, "grad_norm": 0.03864987567067146, "learning_rate": 0.01, "loss": 1.993, "step": 27048 }, { "epoch": 2.780450200431699, "grad_norm": 0.07508687674999237, "learning_rate": 0.01, "loss": 2.0404, "step": 27051 }, { "epoch": 2.780758556891767, "grad_norm": 0.03866630047559738, "learning_rate": 0.01, "loss": 2.0285, "step": 27054 }, { "epoch": 2.7810669133518346, "grad_norm": 0.044196490198373795, "learning_rate": 0.01, "loss": 2.0015, "step": 27057 }, { "epoch": 2.7813752698119023, "grad_norm": 0.03751998767256737, "learning_rate": 0.01, "loss": 2.0142, "step": 27060 }, { "epoch": 2.7816836262719704, "grad_norm": 0.04259081184864044, "learning_rate": 0.01, "loss": 2.0344, "step": 27063 }, { "epoch": 2.781991982732038, "grad_norm": 0.04910355806350708, "learning_rate": 0.01, "loss": 2.0345, "step": 27066 }, { "epoch": 2.782300339192106, "grad_norm": 0.07969753444194794, "learning_rate": 0.01, "loss": 2.0147, "step": 27069 }, { "epoch": 2.782608695652174, "grad_norm": 0.04511536657810211, "learning_rate": 0.01, "loss": 2.0265, "step": 27072 }, { "epoch": 2.782917052112242, "grad_norm": 0.03906785696744919, "learning_rate": 0.01, "loss": 2.0169, "step": 27075 }, { "epoch": 2.7832254085723096, "grad_norm": 0.045391857624053955, "learning_rate": 0.01, "loss": 2.0256, "step": 27078 }, { "epoch": 2.7835337650323773, "grad_norm": 0.04607332870364189, "learning_rate": 0.01, "loss": 2.0379, "step": 27081 }, { "epoch": 2.7838421214924454, "grad_norm": 0.10543697327375412, "learning_rate": 0.01, "loss": 2.0022, "step": 27084 }, { "epoch": 2.784150477952513, "grad_norm": 0.045777421444654465, "learning_rate": 0.01, "loss": 2.0065, "step": 27087 }, { "epoch": 2.7844588344125807, "grad_norm": 0.11416932195425034, "learning_rate": 0.01, "loss": 2.0156, "step": 27090 }, { "epoch": 2.784767190872649, "grad_norm": 0.04630710557103157, "learning_rate": 0.01, "loss": 2.0109, "step": 27093 }, { "epoch": 2.785075547332717, "grad_norm": 0.1050376445055008, "learning_rate": 0.01, "loss": 2.0239, "step": 27096 }, { "epoch": 2.7853839037927846, "grad_norm": 0.04592348635196686, "learning_rate": 0.01, "loss": 2.0032, "step": 27099 }, { "epoch": 2.785692260252852, "grad_norm": 0.058079514652490616, "learning_rate": 0.01, "loss": 2.0067, "step": 27102 }, { "epoch": 2.7860006167129203, "grad_norm": 0.12133990228176117, "learning_rate": 0.01, "loss": 2.0296, "step": 27105 }, { "epoch": 2.786308973172988, "grad_norm": 0.09523560851812363, "learning_rate": 0.01, "loss": 2.0022, "step": 27108 }, { "epoch": 2.7866173296330556, "grad_norm": 0.05585847795009613, "learning_rate": 0.01, "loss": 2.0458, "step": 27111 }, { "epoch": 2.7869256860931237, "grad_norm": 0.03669681027531624, "learning_rate": 0.01, "loss": 2.0072, "step": 27114 }, { "epoch": 2.7872340425531914, "grad_norm": 0.05370241776108742, "learning_rate": 0.01, "loss": 2.0034, "step": 27117 }, { "epoch": 2.787542399013259, "grad_norm": 0.04395952448248863, "learning_rate": 0.01, "loss": 1.992, "step": 27120 }, { "epoch": 2.787850755473327, "grad_norm": 0.04012378677725792, "learning_rate": 0.01, "loss": 2.0269, "step": 27123 }, { "epoch": 2.7881591119333953, "grad_norm": 0.08143822848796844, "learning_rate": 0.01, "loss": 2.0385, "step": 27126 }, { "epoch": 2.788467468393463, "grad_norm": 0.05250631272792816, "learning_rate": 0.01, "loss": 2.0132, "step": 27129 }, { "epoch": 2.7887758248535306, "grad_norm": 0.09945302456617355, "learning_rate": 0.01, "loss": 2.0135, "step": 27132 }, { "epoch": 2.7890841813135987, "grad_norm": 0.06001584604382515, "learning_rate": 0.01, "loss": 1.9975, "step": 27135 }, { "epoch": 2.7893925377736664, "grad_norm": 0.05244366079568863, "learning_rate": 0.01, "loss": 2.0247, "step": 27138 }, { "epoch": 2.789700894233734, "grad_norm": 0.06054399535059929, "learning_rate": 0.01, "loss": 1.9925, "step": 27141 }, { "epoch": 2.790009250693802, "grad_norm": 0.10409083217382431, "learning_rate": 0.01, "loss": 2.005, "step": 27144 }, { "epoch": 2.79031760715387, "grad_norm": 0.06229739263653755, "learning_rate": 0.01, "loss": 2.0175, "step": 27147 }, { "epoch": 2.7906259636139374, "grad_norm": 0.06340862810611725, "learning_rate": 0.01, "loss": 2.0205, "step": 27150 }, { "epoch": 2.7909343200740055, "grad_norm": 0.035666175186634064, "learning_rate": 0.01, "loss": 2.0039, "step": 27153 }, { "epoch": 2.7912426765340737, "grad_norm": 0.043176453560590744, "learning_rate": 0.01, "loss": 2.0045, "step": 27156 }, { "epoch": 2.7915510329941413, "grad_norm": 0.042664237320423126, "learning_rate": 0.01, "loss": 2.0133, "step": 27159 }, { "epoch": 2.791859389454209, "grad_norm": 0.08263540267944336, "learning_rate": 0.01, "loss": 2.0327, "step": 27162 }, { "epoch": 2.792167745914277, "grad_norm": 0.05592063441872597, "learning_rate": 0.01, "loss": 2.012, "step": 27165 }, { "epoch": 2.7924761023743447, "grad_norm": 0.0740666538476944, "learning_rate": 0.01, "loss": 2.0348, "step": 27168 }, { "epoch": 2.7927844588344124, "grad_norm": 0.07478248327970505, "learning_rate": 0.01, "loss": 2.0143, "step": 27171 }, { "epoch": 2.7930928152944805, "grad_norm": 0.08470360189676285, "learning_rate": 0.01, "loss": 2.0075, "step": 27174 }, { "epoch": 2.793401171754548, "grad_norm": 0.04486660659313202, "learning_rate": 0.01, "loss": 1.9898, "step": 27177 }, { "epoch": 2.793709528214616, "grad_norm": 0.12479998171329498, "learning_rate": 0.01, "loss": 2.005, "step": 27180 }, { "epoch": 2.794017884674684, "grad_norm": 0.05652941018342972, "learning_rate": 0.01, "loss": 2.0248, "step": 27183 }, { "epoch": 2.794326241134752, "grad_norm": 0.054577797651290894, "learning_rate": 0.01, "loss": 2.0249, "step": 27186 }, { "epoch": 2.7946345975948197, "grad_norm": 0.03588878735899925, "learning_rate": 0.01, "loss": 2.0321, "step": 27189 }, { "epoch": 2.7949429540548874, "grad_norm": 0.0890214741230011, "learning_rate": 0.01, "loss": 2.0066, "step": 27192 }, { "epoch": 2.7952513105149555, "grad_norm": 0.0901033952832222, "learning_rate": 0.01, "loss": 2.0203, "step": 27195 }, { "epoch": 2.795559666975023, "grad_norm": 0.09306314587593079, "learning_rate": 0.01, "loss": 2.0337, "step": 27198 }, { "epoch": 2.7958680234350908, "grad_norm": 0.07897783815860748, "learning_rate": 0.01, "loss": 2.004, "step": 27201 }, { "epoch": 2.796176379895159, "grad_norm": 0.08055876195430756, "learning_rate": 0.01, "loss": 2.0122, "step": 27204 }, { "epoch": 2.7964847363552265, "grad_norm": 0.058927252888679504, "learning_rate": 0.01, "loss": 2.0178, "step": 27207 }, { "epoch": 2.7967930928152946, "grad_norm": 0.1303315907716751, "learning_rate": 0.01, "loss": 2.0186, "step": 27210 }, { "epoch": 2.7971014492753623, "grad_norm": 0.08321187645196915, "learning_rate": 0.01, "loss": 2.0038, "step": 27213 }, { "epoch": 2.7974098057354304, "grad_norm": 0.0715765729546547, "learning_rate": 0.01, "loss": 2.02, "step": 27216 }, { "epoch": 2.797718162195498, "grad_norm": 0.047501109540462494, "learning_rate": 0.01, "loss": 2.0185, "step": 27219 }, { "epoch": 2.7980265186555657, "grad_norm": 0.04297725111246109, "learning_rate": 0.01, "loss": 1.9893, "step": 27222 }, { "epoch": 2.798334875115634, "grad_norm": 0.08612347394227982, "learning_rate": 0.01, "loss": 2.0351, "step": 27225 }, { "epoch": 2.7986432315757015, "grad_norm": 0.06801822036504745, "learning_rate": 0.01, "loss": 2.0012, "step": 27228 }, { "epoch": 2.798951588035769, "grad_norm": 0.103379026055336, "learning_rate": 0.01, "loss": 2.0172, "step": 27231 }, { "epoch": 2.7992599444958373, "grad_norm": 0.040429919958114624, "learning_rate": 0.01, "loss": 2.0289, "step": 27234 }, { "epoch": 2.799568300955905, "grad_norm": 0.09817671030759811, "learning_rate": 0.01, "loss": 2.0055, "step": 27237 }, { "epoch": 2.799876657415973, "grad_norm": 0.06191599741578102, "learning_rate": 0.01, "loss": 2.0032, "step": 27240 }, { "epoch": 2.8001850138760407, "grad_norm": 0.07992551475763321, "learning_rate": 0.01, "loss": 2.0203, "step": 27243 }, { "epoch": 2.800493370336109, "grad_norm": 0.055606722831726074, "learning_rate": 0.01, "loss": 2.0185, "step": 27246 }, { "epoch": 2.8008017267961764, "grad_norm": 0.1322477012872696, "learning_rate": 0.01, "loss": 2.0215, "step": 27249 }, { "epoch": 2.801110083256244, "grad_norm": 0.05603817105293274, "learning_rate": 0.01, "loss": 2.039, "step": 27252 }, { "epoch": 2.801418439716312, "grad_norm": 0.06935392320156097, "learning_rate": 0.01, "loss": 2.0119, "step": 27255 }, { "epoch": 2.80172679617638, "grad_norm": 0.04515109956264496, "learning_rate": 0.01, "loss": 2.053, "step": 27258 }, { "epoch": 2.8020351526364475, "grad_norm": 0.050864629447460175, "learning_rate": 0.01, "loss": 2.0042, "step": 27261 }, { "epoch": 2.8023435090965156, "grad_norm": 0.038876548409461975, "learning_rate": 0.01, "loss": 1.9945, "step": 27264 }, { "epoch": 2.8026518655565833, "grad_norm": 0.03906438872218132, "learning_rate": 0.01, "loss": 1.9832, "step": 27267 }, { "epoch": 2.8029602220166514, "grad_norm": 0.08978519588708878, "learning_rate": 0.01, "loss": 2.0292, "step": 27270 }, { "epoch": 2.803268578476719, "grad_norm": 0.07353704422712326, "learning_rate": 0.01, "loss": 2.0117, "step": 27273 }, { "epoch": 2.803576934936787, "grad_norm": 0.0424012616276741, "learning_rate": 0.01, "loss": 1.9931, "step": 27276 }, { "epoch": 2.803885291396855, "grad_norm": 0.05939796194434166, "learning_rate": 0.01, "loss": 2.0089, "step": 27279 }, { "epoch": 2.8041936478569225, "grad_norm": 0.06859377771615982, "learning_rate": 0.01, "loss": 2.005, "step": 27282 }, { "epoch": 2.8045020043169906, "grad_norm": 0.1016155332326889, "learning_rate": 0.01, "loss": 2.0207, "step": 27285 }, { "epoch": 2.8048103607770583, "grad_norm": 0.05608060583472252, "learning_rate": 0.01, "loss": 2.0117, "step": 27288 }, { "epoch": 2.805118717237126, "grad_norm": 0.043284934014081955, "learning_rate": 0.01, "loss": 1.9931, "step": 27291 }, { "epoch": 2.805427073697194, "grad_norm": 0.04967540502548218, "learning_rate": 0.01, "loss": 2.0222, "step": 27294 }, { "epoch": 2.8057354301572617, "grad_norm": 0.040739741176366806, "learning_rate": 0.01, "loss": 2.0148, "step": 27297 }, { "epoch": 2.80604378661733, "grad_norm": 0.039170749485492706, "learning_rate": 0.01, "loss": 2.0277, "step": 27300 }, { "epoch": 2.8063521430773974, "grad_norm": 0.040820952504873276, "learning_rate": 0.01, "loss": 2.0151, "step": 27303 }, { "epoch": 2.8066604995374655, "grad_norm": 0.07061021775007248, "learning_rate": 0.01, "loss": 2.0072, "step": 27306 }, { "epoch": 2.806968855997533, "grad_norm": 0.054698627442121506, "learning_rate": 0.01, "loss": 2.0059, "step": 27309 }, { "epoch": 2.807277212457601, "grad_norm": 0.04737408831715584, "learning_rate": 0.01, "loss": 2.0109, "step": 27312 }, { "epoch": 2.807585568917669, "grad_norm": 0.11695525050163269, "learning_rate": 0.01, "loss": 2.0076, "step": 27315 }, { "epoch": 2.8078939253777366, "grad_norm": 0.0921463891863823, "learning_rate": 0.01, "loss": 2.039, "step": 27318 }, { "epoch": 2.8082022818378043, "grad_norm": 0.03874325007200241, "learning_rate": 0.01, "loss": 2.0212, "step": 27321 }, { "epoch": 2.8085106382978724, "grad_norm": 0.04040201008319855, "learning_rate": 0.01, "loss": 1.9732, "step": 27324 }, { "epoch": 2.80881899475794, "grad_norm": 0.035648833960294724, "learning_rate": 0.01, "loss": 2.0269, "step": 27327 }, { "epoch": 2.809127351218008, "grad_norm": 0.0893108919262886, "learning_rate": 0.01, "loss": 1.9982, "step": 27330 }, { "epoch": 2.809435707678076, "grad_norm": 0.1203017309308052, "learning_rate": 0.01, "loss": 2.0138, "step": 27333 }, { "epoch": 2.809744064138144, "grad_norm": 0.045630257576704025, "learning_rate": 0.01, "loss": 2.0178, "step": 27336 }, { "epoch": 2.8100524205982116, "grad_norm": 0.04097437858581543, "learning_rate": 0.01, "loss": 1.9842, "step": 27339 }, { "epoch": 2.8103607770582792, "grad_norm": 0.05476262420415878, "learning_rate": 0.01, "loss": 2.0238, "step": 27342 }, { "epoch": 2.8106691335183474, "grad_norm": 0.05577860027551651, "learning_rate": 0.01, "loss": 2.0396, "step": 27345 }, { "epoch": 2.810977489978415, "grad_norm": 0.04357774555683136, "learning_rate": 0.01, "loss": 2.0158, "step": 27348 }, { "epoch": 2.8112858464384827, "grad_norm": 0.04594416916370392, "learning_rate": 0.01, "loss": 2.0197, "step": 27351 }, { "epoch": 2.8115942028985508, "grad_norm": 0.041454486548900604, "learning_rate": 0.01, "loss": 2.014, "step": 27354 }, { "epoch": 2.8119025593586184, "grad_norm": 0.05279424414038658, "learning_rate": 0.01, "loss": 2.0261, "step": 27357 }, { "epoch": 2.8122109158186865, "grad_norm": 0.10371188074350357, "learning_rate": 0.01, "loss": 2.0333, "step": 27360 }, { "epoch": 2.812519272278754, "grad_norm": 0.053941383957862854, "learning_rate": 0.01, "loss": 2.0067, "step": 27363 }, { "epoch": 2.8128276287388223, "grad_norm": 0.12082924693822861, "learning_rate": 0.01, "loss": 2.0177, "step": 27366 }, { "epoch": 2.81313598519889, "grad_norm": 0.11066311597824097, "learning_rate": 0.01, "loss": 2.0101, "step": 27369 }, { "epoch": 2.8134443416589576, "grad_norm": 0.07547413557767868, "learning_rate": 0.01, "loss": 2.0079, "step": 27372 }, { "epoch": 2.8137526981190257, "grad_norm": 0.06772000342607498, "learning_rate": 0.01, "loss": 2.0077, "step": 27375 }, { "epoch": 2.8140610545790934, "grad_norm": 0.03906597942113876, "learning_rate": 0.01, "loss": 1.9984, "step": 27378 }, { "epoch": 2.814369411039161, "grad_norm": 0.048235367983579636, "learning_rate": 0.01, "loss": 2.0233, "step": 27381 }, { "epoch": 2.814677767499229, "grad_norm": 0.08060193806886673, "learning_rate": 0.01, "loss": 2.0238, "step": 27384 }, { "epoch": 2.814986123959297, "grad_norm": 0.06527181714773178, "learning_rate": 0.01, "loss": 1.9996, "step": 27387 }, { "epoch": 2.815294480419365, "grad_norm": 0.04199523106217384, "learning_rate": 0.01, "loss": 2.0233, "step": 27390 }, { "epoch": 2.8156028368794326, "grad_norm": 0.07436412572860718, "learning_rate": 0.01, "loss": 1.992, "step": 27393 }, { "epoch": 2.8159111933395007, "grad_norm": 0.05724874138832092, "learning_rate": 0.01, "loss": 1.9834, "step": 27396 }, { "epoch": 2.8162195497995683, "grad_norm": 0.03609538823366165, "learning_rate": 0.01, "loss": 2.0376, "step": 27399 }, { "epoch": 2.816527906259636, "grad_norm": 0.03008181042969227, "learning_rate": 0.01, "loss": 2.0129, "step": 27402 }, { "epoch": 2.816836262719704, "grad_norm": 0.0635017454624176, "learning_rate": 0.01, "loss": 2.0, "step": 27405 }, { "epoch": 2.8171446191797718, "grad_norm": 0.07800552248954773, "learning_rate": 0.01, "loss": 2.0143, "step": 27408 }, { "epoch": 2.8174529756398394, "grad_norm": 0.09289496392011642, "learning_rate": 0.01, "loss": 1.991, "step": 27411 }, { "epoch": 2.8177613320999075, "grad_norm": 0.0791633352637291, "learning_rate": 0.01, "loss": 2.033, "step": 27414 }, { "epoch": 2.818069688559975, "grad_norm": 0.06715335696935654, "learning_rate": 0.01, "loss": 2.0149, "step": 27417 }, { "epoch": 2.8183780450200433, "grad_norm": 0.0814514085650444, "learning_rate": 0.01, "loss": 2.025, "step": 27420 }, { "epoch": 2.818686401480111, "grad_norm": 0.09124473482370377, "learning_rate": 0.01, "loss": 2.0017, "step": 27423 }, { "epoch": 2.818994757940179, "grad_norm": 0.03711434826254845, "learning_rate": 0.01, "loss": 2.0231, "step": 27426 }, { "epoch": 2.8193031144002467, "grad_norm": 0.05914726108312607, "learning_rate": 0.01, "loss": 2.0047, "step": 27429 }, { "epoch": 2.8196114708603144, "grad_norm": 0.04358556494116783, "learning_rate": 0.01, "loss": 2.0211, "step": 27432 }, { "epoch": 2.8199198273203825, "grad_norm": 0.1319088488817215, "learning_rate": 0.01, "loss": 2.0186, "step": 27435 }, { "epoch": 2.82022818378045, "grad_norm": 0.07464331388473511, "learning_rate": 0.01, "loss": 2.0434, "step": 27438 }, { "epoch": 2.820536540240518, "grad_norm": 0.08253163844347, "learning_rate": 0.01, "loss": 2.0172, "step": 27441 }, { "epoch": 2.820844896700586, "grad_norm": 0.042612046003341675, "learning_rate": 0.01, "loss": 2.0227, "step": 27444 }, { "epoch": 2.8211532531606536, "grad_norm": 0.04726095870137215, "learning_rate": 0.01, "loss": 2.0301, "step": 27447 }, { "epoch": 2.8214616096207217, "grad_norm": 0.06982813775539398, "learning_rate": 0.01, "loss": 2.0424, "step": 27450 }, { "epoch": 2.8217699660807893, "grad_norm": 0.051457736641168594, "learning_rate": 0.01, "loss": 2.0424, "step": 27453 }, { "epoch": 2.8220783225408574, "grad_norm": 0.07463426142930984, "learning_rate": 0.01, "loss": 2.012, "step": 27456 }, { "epoch": 2.822386679000925, "grad_norm": 0.048826638609170914, "learning_rate": 0.01, "loss": 2.0303, "step": 27459 }, { "epoch": 2.8226950354609928, "grad_norm": 0.05443079397082329, "learning_rate": 0.01, "loss": 2.0424, "step": 27462 }, { "epoch": 2.823003391921061, "grad_norm": 0.10064379125833511, "learning_rate": 0.01, "loss": 2.0166, "step": 27465 }, { "epoch": 2.8233117483811285, "grad_norm": 0.0755578950047493, "learning_rate": 0.01, "loss": 1.9922, "step": 27468 }, { "epoch": 2.823620104841196, "grad_norm": 0.05240621045231819, "learning_rate": 0.01, "loss": 2.0092, "step": 27471 }, { "epoch": 2.8239284613012643, "grad_norm": 0.08781099319458008, "learning_rate": 0.01, "loss": 1.9954, "step": 27474 }, { "epoch": 2.824236817761332, "grad_norm": 0.04524267092347145, "learning_rate": 0.01, "loss": 2.0327, "step": 27477 }, { "epoch": 2.8245451742214, "grad_norm": 0.05119558051228523, "learning_rate": 0.01, "loss": 2.0062, "step": 27480 }, { "epoch": 2.8248535306814677, "grad_norm": 0.06969384104013443, "learning_rate": 0.01, "loss": 2.0216, "step": 27483 }, { "epoch": 2.825161887141536, "grad_norm": 0.09887497872114182, "learning_rate": 0.01, "loss": 2.0033, "step": 27486 }, { "epoch": 2.8254702436016035, "grad_norm": 0.04231324791908264, "learning_rate": 0.01, "loss": 2.0145, "step": 27489 }, { "epoch": 2.825778600061671, "grad_norm": 0.11022655665874481, "learning_rate": 0.01, "loss": 2.0218, "step": 27492 }, { "epoch": 2.8260869565217392, "grad_norm": 0.16821467876434326, "learning_rate": 0.01, "loss": 2.0137, "step": 27495 }, { "epoch": 2.826395312981807, "grad_norm": 0.13946877419948578, "learning_rate": 0.01, "loss": 2.0046, "step": 27498 }, { "epoch": 2.8267036694418746, "grad_norm": 0.07387804985046387, "learning_rate": 0.01, "loss": 2.0294, "step": 27501 }, { "epoch": 2.8270120259019427, "grad_norm": 0.06437870115041733, "learning_rate": 0.01, "loss": 2.0307, "step": 27504 }, { "epoch": 2.8273203823620103, "grad_norm": 0.04310622811317444, "learning_rate": 0.01, "loss": 2.0155, "step": 27507 }, { "epoch": 2.8276287388220784, "grad_norm": 0.04511455446481705, "learning_rate": 0.01, "loss": 2.0243, "step": 27510 }, { "epoch": 2.827937095282146, "grad_norm": 0.05232998728752136, "learning_rate": 0.01, "loss": 2.0523, "step": 27513 }, { "epoch": 2.828245451742214, "grad_norm": 0.03596142679452896, "learning_rate": 0.01, "loss": 2.0137, "step": 27516 }, { "epoch": 2.828553808202282, "grad_norm": 0.035050373524427414, "learning_rate": 0.01, "loss": 2.0141, "step": 27519 }, { "epoch": 2.8288621646623495, "grad_norm": 0.03525279834866524, "learning_rate": 0.01, "loss": 2.007, "step": 27522 }, { "epoch": 2.8291705211224176, "grad_norm": 0.1629800945520401, "learning_rate": 0.01, "loss": 2.0266, "step": 27525 }, { "epoch": 2.8294788775824853, "grad_norm": 0.06687575578689575, "learning_rate": 0.01, "loss": 2.0099, "step": 27528 }, { "epoch": 2.829787234042553, "grad_norm": 0.05308271199464798, "learning_rate": 0.01, "loss": 2.0116, "step": 27531 }, { "epoch": 2.830095590502621, "grad_norm": 0.05350314825773239, "learning_rate": 0.01, "loss": 1.9883, "step": 27534 }, { "epoch": 2.8304039469626887, "grad_norm": 0.03929050639271736, "learning_rate": 0.01, "loss": 2.024, "step": 27537 }, { "epoch": 2.830712303422757, "grad_norm": 0.037572652101516724, "learning_rate": 0.01, "loss": 2.02, "step": 27540 }, { "epoch": 2.8310206598828245, "grad_norm": 0.05316625535488129, "learning_rate": 0.01, "loss": 2.0101, "step": 27543 }, { "epoch": 2.8313290163428926, "grad_norm": 0.04561341553926468, "learning_rate": 0.01, "loss": 2.0185, "step": 27546 }, { "epoch": 2.8316373728029602, "grad_norm": 0.055242884904146194, "learning_rate": 0.01, "loss": 2.0114, "step": 27549 }, { "epoch": 2.831945729263028, "grad_norm": 0.057326629757881165, "learning_rate": 0.01, "loss": 2.0339, "step": 27552 }, { "epoch": 2.832254085723096, "grad_norm": 0.050966937094926834, "learning_rate": 0.01, "loss": 2.0338, "step": 27555 }, { "epoch": 2.8325624421831637, "grad_norm": 0.07518629729747772, "learning_rate": 0.01, "loss": 2.0217, "step": 27558 }, { "epoch": 2.8328707986432313, "grad_norm": 0.10435313731431961, "learning_rate": 0.01, "loss": 2.0082, "step": 27561 }, { "epoch": 2.8331791551032994, "grad_norm": 0.07228770107030869, "learning_rate": 0.01, "loss": 2.0041, "step": 27564 }, { "epoch": 2.833487511563367, "grad_norm": 0.06778567284345627, "learning_rate": 0.01, "loss": 2.0248, "step": 27567 }, { "epoch": 2.833795868023435, "grad_norm": 0.03698448836803436, "learning_rate": 0.01, "loss": 1.9949, "step": 27570 }, { "epoch": 2.834104224483503, "grad_norm": 0.027820177376270294, "learning_rate": 0.01, "loss": 2.0141, "step": 27573 }, { "epoch": 2.834412580943571, "grad_norm": 0.07996013760566711, "learning_rate": 0.01, "loss": 2.0249, "step": 27576 }, { "epoch": 2.8347209374036386, "grad_norm": 0.04629092290997505, "learning_rate": 0.01, "loss": 1.9933, "step": 27579 }, { "epoch": 2.8350292938637063, "grad_norm": 0.04143141210079193, "learning_rate": 0.01, "loss": 2.039, "step": 27582 }, { "epoch": 2.8353376503237744, "grad_norm": 0.12624233961105347, "learning_rate": 0.01, "loss": 2.0228, "step": 27585 }, { "epoch": 2.835646006783842, "grad_norm": 0.1815887838602066, "learning_rate": 0.01, "loss": 2.0228, "step": 27588 }, { "epoch": 2.8359543632439097, "grad_norm": 0.14360418915748596, "learning_rate": 0.01, "loss": 2.0339, "step": 27591 }, { "epoch": 2.836262719703978, "grad_norm": 0.10748742520809174, "learning_rate": 0.01, "loss": 2.0204, "step": 27594 }, { "epoch": 2.836571076164046, "grad_norm": 0.0488545261323452, "learning_rate": 0.01, "loss": 2.0278, "step": 27597 }, { "epoch": 2.8368794326241136, "grad_norm": 0.03699369728565216, "learning_rate": 0.01, "loss": 2.0013, "step": 27600 }, { "epoch": 2.8371877890841812, "grad_norm": 0.04813402146100998, "learning_rate": 0.01, "loss": 2.0025, "step": 27603 }, { "epoch": 2.8374961455442493, "grad_norm": 0.06493838876485825, "learning_rate": 0.01, "loss": 2.0502, "step": 27606 }, { "epoch": 2.837804502004317, "grad_norm": 0.05451219528913498, "learning_rate": 0.01, "loss": 1.9959, "step": 27609 }, { "epoch": 2.8381128584643847, "grad_norm": 0.04877667874097824, "learning_rate": 0.01, "loss": 2.0236, "step": 27612 }, { "epoch": 2.8384212149244528, "grad_norm": 0.05827799066901207, "learning_rate": 0.01, "loss": 2.0247, "step": 27615 }, { "epoch": 2.8387295713845204, "grad_norm": 0.040546271950006485, "learning_rate": 0.01, "loss": 2.0061, "step": 27618 }, { "epoch": 2.839037927844588, "grad_norm": 0.03764180466532707, "learning_rate": 0.01, "loss": 2.0076, "step": 27621 }, { "epoch": 2.839346284304656, "grad_norm": 0.06349320709705353, "learning_rate": 0.01, "loss": 2.0162, "step": 27624 }, { "epoch": 2.8396546407647243, "grad_norm": 0.07079531252384186, "learning_rate": 0.01, "loss": 2.0038, "step": 27627 }, { "epoch": 2.839962997224792, "grad_norm": 0.05074724927544594, "learning_rate": 0.01, "loss": 2.0329, "step": 27630 }, { "epoch": 2.8402713536848596, "grad_norm": 0.10624159872531891, "learning_rate": 0.01, "loss": 2.0454, "step": 27633 }, { "epoch": 2.8405797101449277, "grad_norm": 0.08726594597101212, "learning_rate": 0.01, "loss": 1.9982, "step": 27636 }, { "epoch": 2.8408880666049954, "grad_norm": 0.04581126943230629, "learning_rate": 0.01, "loss": 2.0092, "step": 27639 }, { "epoch": 2.841196423065063, "grad_norm": 0.042876582592725754, "learning_rate": 0.01, "loss": 1.9851, "step": 27642 }, { "epoch": 2.841504779525131, "grad_norm": 0.03951287642121315, "learning_rate": 0.01, "loss": 2.0118, "step": 27645 }, { "epoch": 2.841813135985199, "grad_norm": 0.04439757391810417, "learning_rate": 0.01, "loss": 2.0018, "step": 27648 }, { "epoch": 2.8421214924452665, "grad_norm": 0.05910231173038483, "learning_rate": 0.01, "loss": 2.025, "step": 27651 }, { "epoch": 2.8424298489053346, "grad_norm": 0.1063590720295906, "learning_rate": 0.01, "loss": 2.0129, "step": 27654 }, { "epoch": 2.8427382053654027, "grad_norm": 0.042655881494283676, "learning_rate": 0.01, "loss": 2.004, "step": 27657 }, { "epoch": 2.8430465618254703, "grad_norm": 0.037402208894491196, "learning_rate": 0.01, "loss": 2.0025, "step": 27660 }, { "epoch": 2.843354918285538, "grad_norm": 0.03912290558218956, "learning_rate": 0.01, "loss": 2.0187, "step": 27663 }, { "epoch": 2.843663274745606, "grad_norm": 0.1316000372171402, "learning_rate": 0.01, "loss": 2.0089, "step": 27666 }, { "epoch": 2.8439716312056738, "grad_norm": 0.10243986546993256, "learning_rate": 0.01, "loss": 2.0419, "step": 27669 }, { "epoch": 2.8442799876657414, "grad_norm": 0.09918151050806046, "learning_rate": 0.01, "loss": 2.0299, "step": 27672 }, { "epoch": 2.8445883441258095, "grad_norm": 0.0572928749024868, "learning_rate": 0.01, "loss": 2.0254, "step": 27675 }, { "epoch": 2.844896700585877, "grad_norm": 0.09781496226787567, "learning_rate": 0.01, "loss": 1.9992, "step": 27678 }, { "epoch": 2.845205057045945, "grad_norm": 0.04969809949398041, "learning_rate": 0.01, "loss": 2.0099, "step": 27681 }, { "epoch": 2.845513413506013, "grad_norm": 0.0894259363412857, "learning_rate": 0.01, "loss": 2.0065, "step": 27684 }, { "epoch": 2.845821769966081, "grad_norm": 0.05788953974843025, "learning_rate": 0.01, "loss": 2.0217, "step": 27687 }, { "epoch": 2.8461301264261487, "grad_norm": 0.07628165185451508, "learning_rate": 0.01, "loss": 2.0191, "step": 27690 }, { "epoch": 2.8464384828862164, "grad_norm": 0.07799988240003586, "learning_rate": 0.01, "loss": 1.9947, "step": 27693 }, { "epoch": 2.8467468393462845, "grad_norm": 0.08373581618070602, "learning_rate": 0.01, "loss": 2.0351, "step": 27696 }, { "epoch": 2.847055195806352, "grad_norm": 0.054138489067554474, "learning_rate": 0.01, "loss": 2.0127, "step": 27699 }, { "epoch": 2.84736355226642, "grad_norm": 0.03457311540842056, "learning_rate": 0.01, "loss": 2.0082, "step": 27702 }, { "epoch": 2.847671908726488, "grad_norm": 0.04888417571783066, "learning_rate": 0.01, "loss": 2.0034, "step": 27705 }, { "epoch": 2.8479802651865556, "grad_norm": 0.09924766421318054, "learning_rate": 0.01, "loss": 2.0514, "step": 27708 }, { "epoch": 2.848288621646623, "grad_norm": 0.0531487911939621, "learning_rate": 0.01, "loss": 2.01, "step": 27711 }, { "epoch": 2.8485969781066913, "grad_norm": 0.07373910397291183, "learning_rate": 0.01, "loss": 2.0177, "step": 27714 }, { "epoch": 2.8489053345667594, "grad_norm": 0.09154459089040756, "learning_rate": 0.01, "loss": 2.0375, "step": 27717 }, { "epoch": 2.849213691026827, "grad_norm": 0.04007700830698013, "learning_rate": 0.01, "loss": 2.0447, "step": 27720 }, { "epoch": 2.8495220474868947, "grad_norm": 0.06406736373901367, "learning_rate": 0.01, "loss": 2.0262, "step": 27723 }, { "epoch": 2.849830403946963, "grad_norm": 0.06983067840337753, "learning_rate": 0.01, "loss": 2.0126, "step": 27726 }, { "epoch": 2.8501387604070305, "grad_norm": 0.04832616075873375, "learning_rate": 0.01, "loss": 2.0454, "step": 27729 }, { "epoch": 2.850447116867098, "grad_norm": 0.09404818713665009, "learning_rate": 0.01, "loss": 2.0239, "step": 27732 }, { "epoch": 2.8507554733271663, "grad_norm": 0.12212470918893814, "learning_rate": 0.01, "loss": 1.9912, "step": 27735 }, { "epoch": 2.851063829787234, "grad_norm": 0.08778437972068787, "learning_rate": 0.01, "loss": 2.0503, "step": 27738 }, { "epoch": 2.851372186247302, "grad_norm": 0.08406031876802444, "learning_rate": 0.01, "loss": 2.0432, "step": 27741 }, { "epoch": 2.8516805427073697, "grad_norm": 0.07672454416751862, "learning_rate": 0.01, "loss": 1.999, "step": 27744 }, { "epoch": 2.851988899167438, "grad_norm": 0.12199164927005768, "learning_rate": 0.01, "loss": 2.011, "step": 27747 }, { "epoch": 2.8522972556275055, "grad_norm": 0.05166463926434517, "learning_rate": 0.01, "loss": 2.0053, "step": 27750 }, { "epoch": 2.852605612087573, "grad_norm": 0.11618038266897202, "learning_rate": 0.01, "loss": 1.9846, "step": 27753 }, { "epoch": 2.8529139685476412, "grad_norm": 0.06255706399679184, "learning_rate": 0.01, "loss": 2.0253, "step": 27756 }, { "epoch": 2.853222325007709, "grad_norm": 0.04340292140841484, "learning_rate": 0.01, "loss": 2.0085, "step": 27759 }, { "epoch": 2.8535306814677766, "grad_norm": 0.06595482677221298, "learning_rate": 0.01, "loss": 1.9913, "step": 27762 }, { "epoch": 2.8538390379278447, "grad_norm": 0.03980451449751854, "learning_rate": 0.01, "loss": 1.9994, "step": 27765 }, { "epoch": 2.8541473943879123, "grad_norm": 0.035016849637031555, "learning_rate": 0.01, "loss": 2.0156, "step": 27768 }, { "epoch": 2.8544557508479804, "grad_norm": 0.07189803570508957, "learning_rate": 0.01, "loss": 2.0156, "step": 27771 }, { "epoch": 2.854764107308048, "grad_norm": 0.0759616568684578, "learning_rate": 0.01, "loss": 2.0058, "step": 27774 }, { "epoch": 2.855072463768116, "grad_norm": 0.05985911190509796, "learning_rate": 0.01, "loss": 2.0115, "step": 27777 }, { "epoch": 2.855380820228184, "grad_norm": 0.0529702752828598, "learning_rate": 0.01, "loss": 2.0315, "step": 27780 }, { "epoch": 2.8556891766882515, "grad_norm": 0.046540237963199615, "learning_rate": 0.01, "loss": 2.0035, "step": 27783 }, { "epoch": 2.8559975331483196, "grad_norm": 0.04505128040909767, "learning_rate": 0.01, "loss": 2.0385, "step": 27786 }, { "epoch": 2.8563058896083873, "grad_norm": 0.041494566947221756, "learning_rate": 0.01, "loss": 2.0239, "step": 27789 }, { "epoch": 2.856614246068455, "grad_norm": 0.038743190467357635, "learning_rate": 0.01, "loss": 2.0207, "step": 27792 }, { "epoch": 2.856922602528523, "grad_norm": 0.062206536531448364, "learning_rate": 0.01, "loss": 2.0133, "step": 27795 }, { "epoch": 2.8572309589885907, "grad_norm": 0.19090187549591064, "learning_rate": 0.01, "loss": 2.0292, "step": 27798 }, { "epoch": 2.857539315448659, "grad_norm": 0.06649639457464218, "learning_rate": 0.01, "loss": 2.016, "step": 27801 }, { "epoch": 2.8578476719087265, "grad_norm": 0.07956647872924805, "learning_rate": 0.01, "loss": 1.9797, "step": 27804 }, { "epoch": 2.8581560283687946, "grad_norm": 0.050361573696136475, "learning_rate": 0.01, "loss": 1.9901, "step": 27807 }, { "epoch": 2.8584643848288622, "grad_norm": 0.036132823675870895, "learning_rate": 0.01, "loss": 2.0122, "step": 27810 }, { "epoch": 2.85877274128893, "grad_norm": 0.04948203265666962, "learning_rate": 0.01, "loss": 2.0104, "step": 27813 }, { "epoch": 2.859081097748998, "grad_norm": 0.07491700351238251, "learning_rate": 0.01, "loss": 2.01, "step": 27816 }, { "epoch": 2.8593894542090657, "grad_norm": 0.03278898447751999, "learning_rate": 0.01, "loss": 2.0224, "step": 27819 }, { "epoch": 2.8596978106691333, "grad_norm": 0.04186544567346573, "learning_rate": 0.01, "loss": 1.9922, "step": 27822 }, { "epoch": 2.8600061671292014, "grad_norm": 0.07123599201440811, "learning_rate": 0.01, "loss": 2.0019, "step": 27825 }, { "epoch": 2.860314523589269, "grad_norm": 0.038315288722515106, "learning_rate": 0.01, "loss": 1.9854, "step": 27828 }, { "epoch": 2.860622880049337, "grad_norm": 0.06678714603185654, "learning_rate": 0.01, "loss": 2.0151, "step": 27831 }, { "epoch": 2.860931236509405, "grad_norm": 0.10680954158306122, "learning_rate": 0.01, "loss": 2.0067, "step": 27834 }, { "epoch": 2.861239592969473, "grad_norm": 0.13149844110012054, "learning_rate": 0.01, "loss": 2.0266, "step": 27837 }, { "epoch": 2.8615479494295406, "grad_norm": 0.04700513929128647, "learning_rate": 0.01, "loss": 2.0548, "step": 27840 }, { "epoch": 2.8618563058896083, "grad_norm": 0.03596799075603485, "learning_rate": 0.01, "loss": 2.0206, "step": 27843 }, { "epoch": 2.8621646623496764, "grad_norm": 0.042376477271318436, "learning_rate": 0.01, "loss": 2.0363, "step": 27846 }, { "epoch": 2.862473018809744, "grad_norm": 0.054233819246292114, "learning_rate": 0.01, "loss": 2.0324, "step": 27849 }, { "epoch": 2.8627813752698117, "grad_norm": 0.04501786455512047, "learning_rate": 0.01, "loss": 2.0211, "step": 27852 }, { "epoch": 2.86308973172988, "grad_norm": 0.06443289667367935, "learning_rate": 0.01, "loss": 1.9847, "step": 27855 }, { "epoch": 2.8633980881899475, "grad_norm": 0.05772462114691734, "learning_rate": 0.01, "loss": 2.0152, "step": 27858 }, { "epoch": 2.8637064446500156, "grad_norm": 0.039729420095682144, "learning_rate": 0.01, "loss": 2.0233, "step": 27861 }, { "epoch": 2.864014801110083, "grad_norm": 0.1330452859401703, "learning_rate": 0.01, "loss": 2.0166, "step": 27864 }, { "epoch": 2.8643231575701513, "grad_norm": 0.03952759504318237, "learning_rate": 0.01, "loss": 2.0151, "step": 27867 }, { "epoch": 2.864631514030219, "grad_norm": 0.05294906720519066, "learning_rate": 0.01, "loss": 2.0078, "step": 27870 }, { "epoch": 2.8649398704902866, "grad_norm": 0.07945666462182999, "learning_rate": 0.01, "loss": 2.0237, "step": 27873 }, { "epoch": 2.8652482269503547, "grad_norm": 0.09529011696577072, "learning_rate": 0.01, "loss": 2.0017, "step": 27876 }, { "epoch": 2.8655565834104224, "grad_norm": 0.038869790732860565, "learning_rate": 0.01, "loss": 2.0051, "step": 27879 }, { "epoch": 2.86586493987049, "grad_norm": 0.0421532541513443, "learning_rate": 0.01, "loss": 2.019, "step": 27882 }, { "epoch": 2.866173296330558, "grad_norm": 0.037143412977457047, "learning_rate": 0.01, "loss": 2.0196, "step": 27885 }, { "epoch": 2.866481652790626, "grad_norm": 0.0359092615544796, "learning_rate": 0.01, "loss": 2.0076, "step": 27888 }, { "epoch": 2.866790009250694, "grad_norm": 0.04631698876619339, "learning_rate": 0.01, "loss": 2.0131, "step": 27891 }, { "epoch": 2.8670983657107616, "grad_norm": 0.06015830859541893, "learning_rate": 0.01, "loss": 2.0341, "step": 27894 }, { "epoch": 2.8674067221708297, "grad_norm": 0.09565176069736481, "learning_rate": 0.01, "loss": 2.0251, "step": 27897 }, { "epoch": 2.8677150786308974, "grad_norm": 0.05290444567799568, "learning_rate": 0.01, "loss": 2.0412, "step": 27900 }, { "epoch": 2.868023435090965, "grad_norm": 0.03383943438529968, "learning_rate": 0.01, "loss": 1.9902, "step": 27903 }, { "epoch": 2.868331791551033, "grad_norm": 0.05390051752328873, "learning_rate": 0.01, "loss": 2.0159, "step": 27906 }, { "epoch": 2.868640148011101, "grad_norm": 0.08597470074892044, "learning_rate": 0.01, "loss": 2.0308, "step": 27909 }, { "epoch": 2.8689485044711684, "grad_norm": 0.059518035501241684, "learning_rate": 0.01, "loss": 2.0169, "step": 27912 }, { "epoch": 2.8692568609312366, "grad_norm": 0.04992047697305679, "learning_rate": 0.01, "loss": 2.0267, "step": 27915 }, { "epoch": 2.869565217391304, "grad_norm": 0.04728172719478607, "learning_rate": 0.01, "loss": 2.0266, "step": 27918 }, { "epoch": 2.8698735738513723, "grad_norm": 0.038830891251564026, "learning_rate": 0.01, "loss": 2.0197, "step": 27921 }, { "epoch": 2.87018193031144, "grad_norm": 0.039699580520391464, "learning_rate": 0.01, "loss": 2.0144, "step": 27924 }, { "epoch": 2.870490286771508, "grad_norm": 0.10109611600637436, "learning_rate": 0.01, "loss": 2.0033, "step": 27927 }, { "epoch": 2.8707986432315757, "grad_norm": 0.07930929958820343, "learning_rate": 0.01, "loss": 2.0463, "step": 27930 }, { "epoch": 2.8711069996916434, "grad_norm": 0.05544453486800194, "learning_rate": 0.01, "loss": 2.0342, "step": 27933 }, { "epoch": 2.8714153561517115, "grad_norm": 0.11004924774169922, "learning_rate": 0.01, "loss": 2.0199, "step": 27936 }, { "epoch": 2.871723712611779, "grad_norm": 0.09423034638166428, "learning_rate": 0.01, "loss": 2.0232, "step": 27939 }, { "epoch": 2.872032069071847, "grad_norm": 0.05001804232597351, "learning_rate": 0.01, "loss": 2.0032, "step": 27942 }, { "epoch": 2.872340425531915, "grad_norm": 0.04487982019782066, "learning_rate": 0.01, "loss": 2.0154, "step": 27945 }, { "epoch": 2.8726487819919826, "grad_norm": 0.033674515783786774, "learning_rate": 0.01, "loss": 2.0138, "step": 27948 }, { "epoch": 2.8729571384520507, "grad_norm": 0.040895912796258926, "learning_rate": 0.01, "loss": 1.9902, "step": 27951 }, { "epoch": 2.8732654949121184, "grad_norm": 0.03655741363763809, "learning_rate": 0.01, "loss": 2.0201, "step": 27954 }, { "epoch": 2.8735738513721865, "grad_norm": 0.06042269244790077, "learning_rate": 0.01, "loss": 2.0354, "step": 27957 }, { "epoch": 2.873882207832254, "grad_norm": 0.0660943016409874, "learning_rate": 0.01, "loss": 2.0312, "step": 27960 }, { "epoch": 2.874190564292322, "grad_norm": 0.0681222453713417, "learning_rate": 0.01, "loss": 2.0059, "step": 27963 }, { "epoch": 2.87449892075239, "grad_norm": 0.05666188523173332, "learning_rate": 0.01, "loss": 2.022, "step": 27966 }, { "epoch": 2.8748072772124575, "grad_norm": 0.09911254048347473, "learning_rate": 0.01, "loss": 2.0056, "step": 27969 }, { "epoch": 2.875115633672525, "grad_norm": 0.0730089545249939, "learning_rate": 0.01, "loss": 1.9927, "step": 27972 }, { "epoch": 2.8754239901325933, "grad_norm": 0.05434833839535713, "learning_rate": 0.01, "loss": 1.9998, "step": 27975 }, { "epoch": 2.875732346592661, "grad_norm": 0.07539419084787369, "learning_rate": 0.01, "loss": 2.0304, "step": 27978 }, { "epoch": 2.876040703052729, "grad_norm": 0.04300360381603241, "learning_rate": 0.01, "loss": 2.0046, "step": 27981 }, { "epoch": 2.8763490595127967, "grad_norm": 0.1077357679605484, "learning_rate": 0.01, "loss": 2.0344, "step": 27984 }, { "epoch": 2.876657415972865, "grad_norm": 0.08229992538690567, "learning_rate": 0.01, "loss": 2.0412, "step": 27987 }, { "epoch": 2.8769657724329325, "grad_norm": 0.12404177337884903, "learning_rate": 0.01, "loss": 2.0047, "step": 27990 }, { "epoch": 2.877274128893, "grad_norm": 0.06843412667512894, "learning_rate": 0.01, "loss": 2.0144, "step": 27993 }, { "epoch": 2.8775824853530683, "grad_norm": 0.09625189751386642, "learning_rate": 0.01, "loss": 2.0435, "step": 27996 }, { "epoch": 2.877890841813136, "grad_norm": 0.04479416087269783, "learning_rate": 0.01, "loss": 2.0355, "step": 27999 }, { "epoch": 2.8781991982732036, "grad_norm": 0.04933414235711098, "learning_rate": 0.01, "loss": 2.0018, "step": 28002 }, { "epoch": 2.8785075547332717, "grad_norm": 0.05558469891548157, "learning_rate": 0.01, "loss": 2.03, "step": 28005 }, { "epoch": 2.8788159111933393, "grad_norm": 0.04813915863633156, "learning_rate": 0.01, "loss": 2.0227, "step": 28008 }, { "epoch": 2.8791242676534075, "grad_norm": 0.06402178108692169, "learning_rate": 0.01, "loss": 2.0023, "step": 28011 }, { "epoch": 2.879432624113475, "grad_norm": 0.03481597453355789, "learning_rate": 0.01, "loss": 2.032, "step": 28014 }, { "epoch": 2.879740980573543, "grad_norm": 0.03682602196931839, "learning_rate": 0.01, "loss": 2.0198, "step": 28017 }, { "epoch": 2.880049337033611, "grad_norm": 0.11671306192874908, "learning_rate": 0.01, "loss": 2.0025, "step": 28020 }, { "epoch": 2.8803576934936785, "grad_norm": 0.051900725811719894, "learning_rate": 0.01, "loss": 2.0128, "step": 28023 }, { "epoch": 2.8806660499537466, "grad_norm": 0.034372419118881226, "learning_rate": 0.01, "loss": 2.0146, "step": 28026 }, { "epoch": 2.8809744064138143, "grad_norm": 0.05165507644414902, "learning_rate": 0.01, "loss": 2.0139, "step": 28029 }, { "epoch": 2.881282762873882, "grad_norm": 0.046683549880981445, "learning_rate": 0.01, "loss": 2.0454, "step": 28032 }, { "epoch": 2.88159111933395, "grad_norm": 0.04153186455368996, "learning_rate": 0.01, "loss": 2.022, "step": 28035 }, { "epoch": 2.8818994757940177, "grad_norm": 0.07401791960000992, "learning_rate": 0.01, "loss": 2.0249, "step": 28038 }, { "epoch": 2.882207832254086, "grad_norm": 0.08125065267086029, "learning_rate": 0.01, "loss": 2.0042, "step": 28041 }, { "epoch": 2.8825161887141535, "grad_norm": 0.076799176633358, "learning_rate": 0.01, "loss": 1.9924, "step": 28044 }, { "epoch": 2.8828245451742216, "grad_norm": 0.09814302623271942, "learning_rate": 0.01, "loss": 2.028, "step": 28047 }, { "epoch": 2.8831329016342893, "grad_norm": 0.06998278200626373, "learning_rate": 0.01, "loss": 2.0036, "step": 28050 }, { "epoch": 2.883441258094357, "grad_norm": 0.057476770132780075, "learning_rate": 0.01, "loss": 2.0134, "step": 28053 }, { "epoch": 2.883749614554425, "grad_norm": 0.037239111959934235, "learning_rate": 0.01, "loss": 1.9987, "step": 28056 }, { "epoch": 2.8840579710144927, "grad_norm": 0.08055179566144943, "learning_rate": 0.01, "loss": 2.0267, "step": 28059 }, { "epoch": 2.8843663274745603, "grad_norm": 0.07050419598817825, "learning_rate": 0.01, "loss": 2.0294, "step": 28062 }, { "epoch": 2.8846746839346284, "grad_norm": 0.05187792330980301, "learning_rate": 0.01, "loss": 2.004, "step": 28065 }, { "epoch": 2.884983040394696, "grad_norm": 0.03746993839740753, "learning_rate": 0.01, "loss": 2.0219, "step": 28068 }, { "epoch": 2.885291396854764, "grad_norm": 0.09767074137926102, "learning_rate": 0.01, "loss": 2.0352, "step": 28071 }, { "epoch": 2.885599753314832, "grad_norm": 0.08471439778804779, "learning_rate": 0.01, "loss": 2.0026, "step": 28074 }, { "epoch": 2.8859081097749, "grad_norm": 0.061311863362789154, "learning_rate": 0.01, "loss": 2.0323, "step": 28077 }, { "epoch": 2.8862164662349676, "grad_norm": 0.06402043253183365, "learning_rate": 0.01, "loss": 2.0375, "step": 28080 }, { "epoch": 2.8865248226950353, "grad_norm": 0.06756860762834549, "learning_rate": 0.01, "loss": 2.0232, "step": 28083 }, { "epoch": 2.8868331791551034, "grad_norm": 0.07851870357990265, "learning_rate": 0.01, "loss": 2.0178, "step": 28086 }, { "epoch": 2.887141535615171, "grad_norm": 0.08102571219205856, "learning_rate": 0.01, "loss": 2.0162, "step": 28089 }, { "epoch": 2.8874498920752387, "grad_norm": 0.04745829850435257, "learning_rate": 0.01, "loss": 2.0267, "step": 28092 }, { "epoch": 2.887758248535307, "grad_norm": 0.04496223106980324, "learning_rate": 0.01, "loss": 2.012, "step": 28095 }, { "epoch": 2.8880666049953745, "grad_norm": 0.09044700860977173, "learning_rate": 0.01, "loss": 2.0354, "step": 28098 }, { "epoch": 2.8883749614554426, "grad_norm": 0.05591853708028793, "learning_rate": 0.01, "loss": 2.0033, "step": 28101 }, { "epoch": 2.8886833179155103, "grad_norm": 0.07982437312602997, "learning_rate": 0.01, "loss": 2.0423, "step": 28104 }, { "epoch": 2.8889916743755784, "grad_norm": 0.039345480501651764, "learning_rate": 0.01, "loss": 1.996, "step": 28107 }, { "epoch": 2.889300030835646, "grad_norm": 0.046677373349666595, "learning_rate": 0.01, "loss": 2.0204, "step": 28110 }, { "epoch": 2.8896083872957137, "grad_norm": 0.060302551835775375, "learning_rate": 0.01, "loss": 2.019, "step": 28113 }, { "epoch": 2.889916743755782, "grad_norm": 0.09422065317630768, "learning_rate": 0.01, "loss": 2.0223, "step": 28116 }, { "epoch": 2.8902251002158494, "grad_norm": 0.03713101148605347, "learning_rate": 0.01, "loss": 2.0008, "step": 28119 }, { "epoch": 2.890533456675917, "grad_norm": 0.062431566417217255, "learning_rate": 0.01, "loss": 2.0213, "step": 28122 }, { "epoch": 2.890841813135985, "grad_norm": 0.052230529487133026, "learning_rate": 0.01, "loss": 2.0233, "step": 28125 }, { "epoch": 2.8911501695960533, "grad_norm": 0.05514863133430481, "learning_rate": 0.01, "loss": 2.0233, "step": 28128 }, { "epoch": 2.891458526056121, "grad_norm": 0.04317644238471985, "learning_rate": 0.01, "loss": 2.0111, "step": 28131 }, { "epoch": 2.8917668825161886, "grad_norm": 0.05251277983188629, "learning_rate": 0.01, "loss": 2.0306, "step": 28134 }, { "epoch": 2.8920752389762567, "grad_norm": 0.10419400781393051, "learning_rate": 0.01, "loss": 2.0253, "step": 28137 }, { "epoch": 2.8923835954363244, "grad_norm": 0.08806058764457703, "learning_rate": 0.01, "loss": 2.0079, "step": 28140 }, { "epoch": 2.892691951896392, "grad_norm": 0.059875234961509705, "learning_rate": 0.01, "loss": 2.0066, "step": 28143 }, { "epoch": 2.89300030835646, "grad_norm": 0.036619991064071655, "learning_rate": 0.01, "loss": 2.0208, "step": 28146 }, { "epoch": 2.893308664816528, "grad_norm": 0.034454282373189926, "learning_rate": 0.01, "loss": 2.0068, "step": 28149 }, { "epoch": 2.8936170212765955, "grad_norm": 0.05209527164697647, "learning_rate": 0.01, "loss": 2.0252, "step": 28152 }, { "epoch": 2.8939253777366636, "grad_norm": 0.11549924314022064, "learning_rate": 0.01, "loss": 1.9974, "step": 28155 }, { "epoch": 2.8942337341967317, "grad_norm": 0.06887582689523697, "learning_rate": 0.01, "loss": 2.0, "step": 28158 }, { "epoch": 2.8945420906567993, "grad_norm": 0.046488743275403976, "learning_rate": 0.01, "loss": 1.9827, "step": 28161 }, { "epoch": 2.894850447116867, "grad_norm": 0.05086890980601311, "learning_rate": 0.01, "loss": 2.0073, "step": 28164 }, { "epoch": 2.895158803576935, "grad_norm": 0.03719864413142204, "learning_rate": 0.01, "loss": 2.0333, "step": 28167 }, { "epoch": 2.8954671600370028, "grad_norm": 0.059325605630874634, "learning_rate": 0.01, "loss": 2.0056, "step": 28170 }, { "epoch": 2.8957755164970704, "grad_norm": 0.088272824883461, "learning_rate": 0.01, "loss": 2.0114, "step": 28173 }, { "epoch": 2.8960838729571385, "grad_norm": 0.0874638557434082, "learning_rate": 0.01, "loss": 2.0158, "step": 28176 }, { "epoch": 2.896392229417206, "grad_norm": 0.059453610330820084, "learning_rate": 0.01, "loss": 1.9919, "step": 28179 }, { "epoch": 2.896700585877274, "grad_norm": 0.05221414193511009, "learning_rate": 0.01, "loss": 2.0056, "step": 28182 }, { "epoch": 2.897008942337342, "grad_norm": 0.03308747336268425, "learning_rate": 0.01, "loss": 2.0072, "step": 28185 }, { "epoch": 2.89731729879741, "grad_norm": 0.07506965100765228, "learning_rate": 0.01, "loss": 2.0022, "step": 28188 }, { "epoch": 2.8976256552574777, "grad_norm": 0.06189137324690819, "learning_rate": 0.01, "loss": 2.0046, "step": 28191 }, { "epoch": 2.8979340117175454, "grad_norm": 0.07855790853500366, "learning_rate": 0.01, "loss": 2.0307, "step": 28194 }, { "epoch": 2.8982423681776135, "grad_norm": 0.08880770951509476, "learning_rate": 0.01, "loss": 2.0242, "step": 28197 }, { "epoch": 2.898550724637681, "grad_norm": 0.05860179290175438, "learning_rate": 0.01, "loss": 2.0394, "step": 28200 }, { "epoch": 2.898859081097749, "grad_norm": 0.06037219241261482, "learning_rate": 0.01, "loss": 2.0131, "step": 28203 }, { "epoch": 2.899167437557817, "grad_norm": 0.16910460591316223, "learning_rate": 0.01, "loss": 2.0184, "step": 28206 }, { "epoch": 2.8994757940178846, "grad_norm": 0.05912697687745094, "learning_rate": 0.01, "loss": 2.0141, "step": 28209 }, { "epoch": 2.8997841504779522, "grad_norm": 0.04245199263095856, "learning_rate": 0.01, "loss": 2.0206, "step": 28212 }, { "epoch": 2.9000925069380203, "grad_norm": 0.03307129442691803, "learning_rate": 0.01, "loss": 2.0258, "step": 28215 }, { "epoch": 2.9004008633980884, "grad_norm": 0.03456464782357216, "learning_rate": 0.01, "loss": 2.0219, "step": 28218 }, { "epoch": 2.900709219858156, "grad_norm": 0.04525744169950485, "learning_rate": 0.01, "loss": 2.0417, "step": 28221 }, { "epoch": 2.9010175763182238, "grad_norm": 0.046930085867643356, "learning_rate": 0.01, "loss": 2.0296, "step": 28224 }, { "epoch": 2.901325932778292, "grad_norm": 0.07545941323041916, "learning_rate": 0.01, "loss": 2.028, "step": 28227 }, { "epoch": 2.9016342892383595, "grad_norm": 0.045901909470558167, "learning_rate": 0.01, "loss": 2.0143, "step": 28230 }, { "epoch": 2.901942645698427, "grad_norm": 0.06182632967829704, "learning_rate": 0.01, "loss": 2.0184, "step": 28233 }, { "epoch": 2.9022510021584953, "grad_norm": 0.1126553937792778, "learning_rate": 0.01, "loss": 2.0134, "step": 28236 }, { "epoch": 2.902559358618563, "grad_norm": 0.05637908726930618, "learning_rate": 0.01, "loss": 2.0023, "step": 28239 }, { "epoch": 2.902867715078631, "grad_norm": 0.03390496224164963, "learning_rate": 0.01, "loss": 2.0097, "step": 28242 }, { "epoch": 2.9031760715386987, "grad_norm": 0.1719658523797989, "learning_rate": 0.01, "loss": 2.0446, "step": 28245 }, { "epoch": 2.903484427998767, "grad_norm": 0.054201435297727585, "learning_rate": 0.01, "loss": 2.0101, "step": 28248 }, { "epoch": 2.9037927844588345, "grad_norm": 0.05339137092232704, "learning_rate": 0.01, "loss": 2.024, "step": 28251 }, { "epoch": 2.904101140918902, "grad_norm": 0.0490683875977993, "learning_rate": 0.01, "loss": 2.0112, "step": 28254 }, { "epoch": 2.9044094973789703, "grad_norm": 0.048101890832185745, "learning_rate": 0.01, "loss": 2.0244, "step": 28257 }, { "epoch": 2.904717853839038, "grad_norm": 0.034970205277204514, "learning_rate": 0.01, "loss": 2.0128, "step": 28260 }, { "epoch": 2.9050262102991056, "grad_norm": 0.05500125139951706, "learning_rate": 0.01, "loss": 1.9963, "step": 28263 }, { "epoch": 2.9053345667591737, "grad_norm": 0.0717696025967598, "learning_rate": 0.01, "loss": 2.0119, "step": 28266 }, { "epoch": 2.9056429232192413, "grad_norm": 0.08134905993938446, "learning_rate": 0.01, "loss": 2.0343, "step": 28269 }, { "epoch": 2.9059512796793094, "grad_norm": 0.13240204751491547, "learning_rate": 0.01, "loss": 2.0268, "step": 28272 }, { "epoch": 2.906259636139377, "grad_norm": 0.04060851037502289, "learning_rate": 0.01, "loss": 2.0163, "step": 28275 }, { "epoch": 2.906567992599445, "grad_norm": 0.04560050368309021, "learning_rate": 0.01, "loss": 2.0037, "step": 28278 }, { "epoch": 2.906876349059513, "grad_norm": 0.051935113966464996, "learning_rate": 0.01, "loss": 2.0025, "step": 28281 }, { "epoch": 2.9071847055195805, "grad_norm": 0.04502344876527786, "learning_rate": 0.01, "loss": 2.0212, "step": 28284 }, { "epoch": 2.9074930619796486, "grad_norm": 0.04561367630958557, "learning_rate": 0.01, "loss": 2.0369, "step": 28287 }, { "epoch": 2.9078014184397163, "grad_norm": 0.05686529353260994, "learning_rate": 0.01, "loss": 2.0217, "step": 28290 }, { "epoch": 2.908109774899784, "grad_norm": 0.06489894539117813, "learning_rate": 0.01, "loss": 2.0126, "step": 28293 }, { "epoch": 2.908418131359852, "grad_norm": 0.08427495509386063, "learning_rate": 0.01, "loss": 2.0, "step": 28296 }, { "epoch": 2.9087264878199197, "grad_norm": 0.08298421651124954, "learning_rate": 0.01, "loss": 2.0273, "step": 28299 }, { "epoch": 2.909034844279988, "grad_norm": 0.035743072628974915, "learning_rate": 0.01, "loss": 2.0167, "step": 28302 }, { "epoch": 2.9093432007400555, "grad_norm": 0.11177317798137665, "learning_rate": 0.01, "loss": 2.0037, "step": 28305 }, { "epoch": 2.9096515572001236, "grad_norm": 0.10962017625570297, "learning_rate": 0.01, "loss": 2.0155, "step": 28308 }, { "epoch": 2.9099599136601912, "grad_norm": 0.033734966069459915, "learning_rate": 0.01, "loss": 2.0131, "step": 28311 }, { "epoch": 2.910268270120259, "grad_norm": 0.050082772970199585, "learning_rate": 0.01, "loss": 2.0117, "step": 28314 }, { "epoch": 2.910576626580327, "grad_norm": 0.04048197343945503, "learning_rate": 0.01, "loss": 2.0085, "step": 28317 }, { "epoch": 2.9108849830403947, "grad_norm": 0.039657291024923325, "learning_rate": 0.01, "loss": 2.0233, "step": 28320 }, { "epoch": 2.9111933395004623, "grad_norm": 0.0639658197760582, "learning_rate": 0.01, "loss": 2.0001, "step": 28323 }, { "epoch": 2.9115016959605304, "grad_norm": 0.09306667000055313, "learning_rate": 0.01, "loss": 2.019, "step": 28326 }, { "epoch": 2.911810052420598, "grad_norm": 0.06027163937687874, "learning_rate": 0.01, "loss": 2.0341, "step": 28329 }, { "epoch": 2.912118408880666, "grad_norm": 0.08570882678031921, "learning_rate": 0.01, "loss": 2.0192, "step": 28332 }, { "epoch": 2.912426765340734, "grad_norm": 0.07936012744903564, "learning_rate": 0.01, "loss": 2.0165, "step": 28335 }, { "epoch": 2.912735121800802, "grad_norm": 0.06479290872812271, "learning_rate": 0.01, "loss": 2.027, "step": 28338 }, { "epoch": 2.9130434782608696, "grad_norm": 0.06973425298929214, "learning_rate": 0.01, "loss": 2.0037, "step": 28341 }, { "epoch": 2.9133518347209373, "grad_norm": 0.05557083338499069, "learning_rate": 0.01, "loss": 2.0323, "step": 28344 }, { "epoch": 2.9136601911810054, "grad_norm": 0.04901493340730667, "learning_rate": 0.01, "loss": 2.0088, "step": 28347 }, { "epoch": 2.913968547641073, "grad_norm": 0.1036488264799118, "learning_rate": 0.01, "loss": 2.0486, "step": 28350 }, { "epoch": 2.9142769041011407, "grad_norm": 0.04318247362971306, "learning_rate": 0.01, "loss": 2.0195, "step": 28353 }, { "epoch": 2.914585260561209, "grad_norm": 0.0404491052031517, "learning_rate": 0.01, "loss": 2.0237, "step": 28356 }, { "epoch": 2.9148936170212765, "grad_norm": 0.03727111592888832, "learning_rate": 0.01, "loss": 2.0153, "step": 28359 }, { "epoch": 2.9152019734813446, "grad_norm": 0.04736052453517914, "learning_rate": 0.01, "loss": 2.0487, "step": 28362 }, { "epoch": 2.9155103299414122, "grad_norm": 0.13712109625339508, "learning_rate": 0.01, "loss": 2.0434, "step": 28365 }, { "epoch": 2.9158186864014803, "grad_norm": 0.046428218483924866, "learning_rate": 0.01, "loss": 2.0177, "step": 28368 }, { "epoch": 2.916127042861548, "grad_norm": 0.036403872072696686, "learning_rate": 0.01, "loss": 2.0144, "step": 28371 }, { "epoch": 2.9164353993216157, "grad_norm": 0.04359270632266998, "learning_rate": 0.01, "loss": 2.0386, "step": 28374 }, { "epoch": 2.9167437557816838, "grad_norm": 0.06155150756239891, "learning_rate": 0.01, "loss": 2.0014, "step": 28377 }, { "epoch": 2.9170521122417514, "grad_norm": 0.035659730434417725, "learning_rate": 0.01, "loss": 2.012, "step": 28380 }, { "epoch": 2.917360468701819, "grad_norm": 0.09453250467777252, "learning_rate": 0.01, "loss": 2.0006, "step": 28383 }, { "epoch": 2.917668825161887, "grad_norm": 0.062337253242731094, "learning_rate": 0.01, "loss": 2.0131, "step": 28386 }, { "epoch": 2.917977181621955, "grad_norm": 0.05488646402955055, "learning_rate": 0.01, "loss": 2.0212, "step": 28389 }, { "epoch": 2.918285538082023, "grad_norm": 0.07092556357383728, "learning_rate": 0.01, "loss": 2.0123, "step": 28392 }, { "epoch": 2.9185938945420906, "grad_norm": 0.10335639864206314, "learning_rate": 0.01, "loss": 2.0336, "step": 28395 }, { "epoch": 2.9189022510021587, "grad_norm": 0.1187339797616005, "learning_rate": 0.01, "loss": 2.0218, "step": 28398 }, { "epoch": 2.9192106074622264, "grad_norm": 0.09254027903079987, "learning_rate": 0.01, "loss": 2.0077, "step": 28401 }, { "epoch": 2.919518963922294, "grad_norm": 0.04120078682899475, "learning_rate": 0.01, "loss": 2.032, "step": 28404 }, { "epoch": 2.919827320382362, "grad_norm": 0.04366682097315788, "learning_rate": 0.01, "loss": 2.0128, "step": 28407 }, { "epoch": 2.92013567684243, "grad_norm": 0.03700088709592819, "learning_rate": 0.01, "loss": 2.0027, "step": 28410 }, { "epoch": 2.9204440333024975, "grad_norm": 0.03409232571721077, "learning_rate": 0.01, "loss": 2.0068, "step": 28413 }, { "epoch": 2.9207523897625656, "grad_norm": 0.04503092169761658, "learning_rate": 0.01, "loss": 2.0042, "step": 28416 }, { "epoch": 2.9210607462226332, "grad_norm": 0.04988139122724533, "learning_rate": 0.01, "loss": 2.0418, "step": 28419 }, { "epoch": 2.9213691026827013, "grad_norm": 0.05121064558625221, "learning_rate": 0.01, "loss": 2.0241, "step": 28422 }, { "epoch": 2.921677459142769, "grad_norm": 0.03916258364915848, "learning_rate": 0.01, "loss": 2.0461, "step": 28425 }, { "epoch": 2.921985815602837, "grad_norm": 0.1349031776189804, "learning_rate": 0.01, "loss": 2.0186, "step": 28428 }, { "epoch": 2.9222941720629048, "grad_norm": 0.12042077630758286, "learning_rate": 0.01, "loss": 2.0378, "step": 28431 }, { "epoch": 2.9226025285229724, "grad_norm": 0.08157286792993546, "learning_rate": 0.01, "loss": 1.9957, "step": 28434 }, { "epoch": 2.9229108849830405, "grad_norm": 0.07750184834003448, "learning_rate": 0.01, "loss": 2.0193, "step": 28437 }, { "epoch": 2.923219241443108, "grad_norm": 0.07270103693008423, "learning_rate": 0.01, "loss": 2.015, "step": 28440 }, { "epoch": 2.923527597903176, "grad_norm": 0.05140276625752449, "learning_rate": 0.01, "loss": 1.9987, "step": 28443 }, { "epoch": 2.923835954363244, "grad_norm": 0.07426592707633972, "learning_rate": 0.01, "loss": 2.031, "step": 28446 }, { "epoch": 2.9241443108233116, "grad_norm": 0.05136018618941307, "learning_rate": 0.01, "loss": 2.0166, "step": 28449 }, { "epoch": 2.9244526672833797, "grad_norm": 0.03354816511273384, "learning_rate": 0.01, "loss": 1.9985, "step": 28452 }, { "epoch": 2.9247610237434474, "grad_norm": 0.03671132028102875, "learning_rate": 0.01, "loss": 1.9954, "step": 28455 }, { "epoch": 2.9250693802035155, "grad_norm": 0.05378049612045288, "learning_rate": 0.01, "loss": 2.0276, "step": 28458 }, { "epoch": 2.925377736663583, "grad_norm": 0.04772641509771347, "learning_rate": 0.01, "loss": 2.0113, "step": 28461 }, { "epoch": 2.925686093123651, "grad_norm": 0.05942932888865471, "learning_rate": 0.01, "loss": 2.0223, "step": 28464 }, { "epoch": 2.925994449583719, "grad_norm": 0.059020113199949265, "learning_rate": 0.01, "loss": 2.0089, "step": 28467 }, { "epoch": 2.9263028060437866, "grad_norm": 0.19283726811408997, "learning_rate": 0.01, "loss": 2.0302, "step": 28470 }, { "epoch": 2.9266111625038542, "grad_norm": 0.09231256693601608, "learning_rate": 0.01, "loss": 1.9915, "step": 28473 }, { "epoch": 2.9269195189639223, "grad_norm": 0.058674681931734085, "learning_rate": 0.01, "loss": 2.0095, "step": 28476 }, { "epoch": 2.92722787542399, "grad_norm": 0.05014891177415848, "learning_rate": 0.01, "loss": 2.0197, "step": 28479 }, { "epoch": 2.927536231884058, "grad_norm": 0.060597196221351624, "learning_rate": 0.01, "loss": 1.9932, "step": 28482 }, { "epoch": 2.9278445883441258, "grad_norm": 0.08379890769720078, "learning_rate": 0.01, "loss": 2.027, "step": 28485 }, { "epoch": 2.928152944804194, "grad_norm": 0.04832527041435242, "learning_rate": 0.01, "loss": 1.9973, "step": 28488 }, { "epoch": 2.9284613012642615, "grad_norm": 0.06709366291761398, "learning_rate": 0.01, "loss": 2.0287, "step": 28491 }, { "epoch": 2.928769657724329, "grad_norm": 0.044588811695575714, "learning_rate": 0.01, "loss": 2.0253, "step": 28494 }, { "epoch": 2.9290780141843973, "grad_norm": 0.05735667049884796, "learning_rate": 0.01, "loss": 1.9972, "step": 28497 }, { "epoch": 2.929386370644465, "grad_norm": 0.041256386786699295, "learning_rate": 0.01, "loss": 2.0109, "step": 28500 }, { "epoch": 2.9296947271045326, "grad_norm": 0.03460060432553291, "learning_rate": 0.01, "loss": 2.0028, "step": 28503 }, { "epoch": 2.9300030835646007, "grad_norm": 0.05080440267920494, "learning_rate": 0.01, "loss": 2.0186, "step": 28506 }, { "epoch": 2.9303114400246684, "grad_norm": 0.1281091421842575, "learning_rate": 0.01, "loss": 2.0219, "step": 28509 }, { "epoch": 2.9306197964847365, "grad_norm": 0.05667036399245262, "learning_rate": 0.01, "loss": 2.0034, "step": 28512 }, { "epoch": 2.930928152944804, "grad_norm": 0.10026687383651733, "learning_rate": 0.01, "loss": 2.0082, "step": 28515 }, { "epoch": 2.9312365094048722, "grad_norm": 0.04924483224749565, "learning_rate": 0.01, "loss": 2.0034, "step": 28518 }, { "epoch": 2.93154486586494, "grad_norm": 0.04905860871076584, "learning_rate": 0.01, "loss": 2.0125, "step": 28521 }, { "epoch": 2.9318532223250076, "grad_norm": 0.0525185652077198, "learning_rate": 0.01, "loss": 2.0193, "step": 28524 }, { "epoch": 2.9321615787850757, "grad_norm": 0.04196888953447342, "learning_rate": 0.01, "loss": 2.0167, "step": 28527 }, { "epoch": 2.9324699352451433, "grad_norm": 0.04660086706280708, "learning_rate": 0.01, "loss": 1.996, "step": 28530 }, { "epoch": 2.932778291705211, "grad_norm": 0.05741078406572342, "learning_rate": 0.01, "loss": 2.0093, "step": 28533 }, { "epoch": 2.933086648165279, "grad_norm": 0.052953120321035385, "learning_rate": 0.01, "loss": 2.0111, "step": 28536 }, { "epoch": 2.9333950046253467, "grad_norm": 0.04325610026717186, "learning_rate": 0.01, "loss": 2.0252, "step": 28539 }, { "epoch": 2.933703361085415, "grad_norm": 0.04387751594185829, "learning_rate": 0.01, "loss": 2.0052, "step": 28542 }, { "epoch": 2.9340117175454825, "grad_norm": 0.03476174548268318, "learning_rate": 0.01, "loss": 2.0143, "step": 28545 }, { "epoch": 2.9343200740055506, "grad_norm": 0.03839149698615074, "learning_rate": 0.01, "loss": 2.0289, "step": 28548 }, { "epoch": 2.9346284304656183, "grad_norm": 0.0841054692864418, "learning_rate": 0.01, "loss": 1.9642, "step": 28551 }, { "epoch": 2.934936786925686, "grad_norm": 0.10850472748279572, "learning_rate": 0.01, "loss": 2.0345, "step": 28554 }, { "epoch": 2.935245143385754, "grad_norm": 0.12255658954381943, "learning_rate": 0.01, "loss": 1.9961, "step": 28557 }, { "epoch": 2.9355534998458217, "grad_norm": 0.05664276331663132, "learning_rate": 0.01, "loss": 2.0202, "step": 28560 }, { "epoch": 2.9358618563058894, "grad_norm": 0.11724577099084854, "learning_rate": 0.01, "loss": 2.027, "step": 28563 }, { "epoch": 2.9361702127659575, "grad_norm": 0.06823346763849258, "learning_rate": 0.01, "loss": 2.0104, "step": 28566 }, { "epoch": 2.936478569226025, "grad_norm": 0.035576723515987396, "learning_rate": 0.01, "loss": 2.0302, "step": 28569 }, { "epoch": 2.9367869256860932, "grad_norm": 0.03186320886015892, "learning_rate": 0.01, "loss": 2.0181, "step": 28572 }, { "epoch": 2.937095282146161, "grad_norm": 0.03750181198120117, "learning_rate": 0.01, "loss": 2.0096, "step": 28575 }, { "epoch": 2.937403638606229, "grad_norm": 0.05272996798157692, "learning_rate": 0.01, "loss": 2.0021, "step": 28578 }, { "epoch": 2.9377119950662967, "grad_norm": 0.033289846032857895, "learning_rate": 0.01, "loss": 2.0055, "step": 28581 }, { "epoch": 2.9380203515263643, "grad_norm": 0.11037155240774155, "learning_rate": 0.01, "loss": 2.0271, "step": 28584 }, { "epoch": 2.9383287079864324, "grad_norm": 0.10620691627264023, "learning_rate": 0.01, "loss": 2.0012, "step": 28587 }, { "epoch": 2.9386370644465, "grad_norm": 0.052820686250925064, "learning_rate": 0.01, "loss": 2.009, "step": 28590 }, { "epoch": 2.9389454209065677, "grad_norm": 0.057419613003730774, "learning_rate": 0.01, "loss": 2.0122, "step": 28593 }, { "epoch": 2.939253777366636, "grad_norm": 0.04066954553127289, "learning_rate": 0.01, "loss": 2.0043, "step": 28596 }, { "epoch": 2.9395621338267035, "grad_norm": 0.04638439416885376, "learning_rate": 0.01, "loss": 2.0244, "step": 28599 }, { "epoch": 2.9398704902867716, "grad_norm": 0.11629427969455719, "learning_rate": 0.01, "loss": 1.9867, "step": 28602 }, { "epoch": 2.9401788467468393, "grad_norm": 0.05921720713376999, "learning_rate": 0.01, "loss": 2.0155, "step": 28605 }, { "epoch": 2.9404872032069074, "grad_norm": 0.05282594636082649, "learning_rate": 0.01, "loss": 2.0138, "step": 28608 }, { "epoch": 2.940795559666975, "grad_norm": 0.04680659994482994, "learning_rate": 0.01, "loss": 2.011, "step": 28611 }, { "epoch": 2.9411039161270427, "grad_norm": 0.11889224499464035, "learning_rate": 0.01, "loss": 2.0242, "step": 28614 }, { "epoch": 2.941412272587111, "grad_norm": 0.03500881418585777, "learning_rate": 0.01, "loss": 1.9972, "step": 28617 }, { "epoch": 2.9417206290471785, "grad_norm": 0.08639416098594666, "learning_rate": 0.01, "loss": 2.0108, "step": 28620 }, { "epoch": 2.942028985507246, "grad_norm": 0.03335639461874962, "learning_rate": 0.01, "loss": 2.0262, "step": 28623 }, { "epoch": 2.9423373419673142, "grad_norm": 0.045041028410196304, "learning_rate": 0.01, "loss": 2.0102, "step": 28626 }, { "epoch": 2.9426456984273823, "grad_norm": 0.047020211815834045, "learning_rate": 0.01, "loss": 2.0278, "step": 28629 }, { "epoch": 2.94295405488745, "grad_norm": 0.059858404099941254, "learning_rate": 0.01, "loss": 2.0038, "step": 28632 }, { "epoch": 2.9432624113475176, "grad_norm": 0.053345970809459686, "learning_rate": 0.01, "loss": 2.0135, "step": 28635 }, { "epoch": 2.9435707678075858, "grad_norm": 0.05528027564287186, "learning_rate": 0.01, "loss": 2.0044, "step": 28638 }, { "epoch": 2.9438791242676534, "grad_norm": 0.09895586222410202, "learning_rate": 0.01, "loss": 2.0117, "step": 28641 }, { "epoch": 2.944187480727721, "grad_norm": 0.11827319115400314, "learning_rate": 0.01, "loss": 1.9899, "step": 28644 }, { "epoch": 2.944495837187789, "grad_norm": 0.05013230815529823, "learning_rate": 0.01, "loss": 2.0146, "step": 28647 }, { "epoch": 2.944804193647857, "grad_norm": 0.0422658771276474, "learning_rate": 0.01, "loss": 2.0064, "step": 28650 }, { "epoch": 2.9451125501079245, "grad_norm": 0.04022778570652008, "learning_rate": 0.01, "loss": 1.9972, "step": 28653 }, { "epoch": 2.9454209065679926, "grad_norm": 0.043504439294338226, "learning_rate": 0.01, "loss": 2.0138, "step": 28656 }, { "epoch": 2.9457292630280607, "grad_norm": 0.06587618589401245, "learning_rate": 0.01, "loss": 2.0084, "step": 28659 }, { "epoch": 2.9460376194881284, "grad_norm": 0.10226722061634064, "learning_rate": 0.01, "loss": 2.0073, "step": 28662 }, { "epoch": 2.946345975948196, "grad_norm": 0.047040197998285294, "learning_rate": 0.01, "loss": 2.0082, "step": 28665 }, { "epoch": 2.946654332408264, "grad_norm": 0.0919933021068573, "learning_rate": 0.01, "loss": 2.0079, "step": 28668 }, { "epoch": 2.946962688868332, "grad_norm": 0.05978929251432419, "learning_rate": 0.01, "loss": 2.0271, "step": 28671 }, { "epoch": 2.9472710453283995, "grad_norm": 0.05814214050769806, "learning_rate": 0.01, "loss": 2.0224, "step": 28674 }, { "epoch": 2.9475794017884676, "grad_norm": 0.09039480984210968, "learning_rate": 0.01, "loss": 2.0026, "step": 28677 }, { "epoch": 2.947887758248535, "grad_norm": 0.1607222557067871, "learning_rate": 0.01, "loss": 2.0201, "step": 28680 }, { "epoch": 2.948196114708603, "grad_norm": 0.04198214039206505, "learning_rate": 0.01, "loss": 2.0157, "step": 28683 }, { "epoch": 2.948504471168671, "grad_norm": 0.06442588567733765, "learning_rate": 0.01, "loss": 1.9874, "step": 28686 }, { "epoch": 2.948812827628739, "grad_norm": 0.11082901805639267, "learning_rate": 0.01, "loss": 2.0165, "step": 28689 }, { "epoch": 2.9491211840888067, "grad_norm": 0.03669416159391403, "learning_rate": 0.01, "loss": 2.0311, "step": 28692 }, { "epoch": 2.9494295405488744, "grad_norm": 0.06837104260921478, "learning_rate": 0.01, "loss": 2.0052, "step": 28695 }, { "epoch": 2.9497378970089425, "grad_norm": 0.0466892383992672, "learning_rate": 0.01, "loss": 1.9896, "step": 28698 }, { "epoch": 2.95004625346901, "grad_norm": 0.06962305307388306, "learning_rate": 0.01, "loss": 2.023, "step": 28701 }, { "epoch": 2.950354609929078, "grad_norm": 0.06136500835418701, "learning_rate": 0.01, "loss": 2.0189, "step": 28704 }, { "epoch": 2.950662966389146, "grad_norm": 0.050067611038684845, "learning_rate": 0.01, "loss": 2.0258, "step": 28707 }, { "epoch": 2.9509713228492136, "grad_norm": 0.0527566559612751, "learning_rate": 0.01, "loss": 2.0157, "step": 28710 }, { "epoch": 2.9512796793092813, "grad_norm": 0.12275776267051697, "learning_rate": 0.01, "loss": 2.0061, "step": 28713 }, { "epoch": 2.9515880357693494, "grad_norm": 0.052756138145923615, "learning_rate": 0.01, "loss": 2.0235, "step": 28716 }, { "epoch": 2.9518963922294175, "grad_norm": 0.038834694772958755, "learning_rate": 0.01, "loss": 1.9956, "step": 28719 }, { "epoch": 2.952204748689485, "grad_norm": 0.07736565917730331, "learning_rate": 0.01, "loss": 2.0185, "step": 28722 }, { "epoch": 2.952513105149553, "grad_norm": 0.10177972167730331, "learning_rate": 0.01, "loss": 2.017, "step": 28725 }, { "epoch": 2.952821461609621, "grad_norm": 0.082985520362854, "learning_rate": 0.01, "loss": 1.9967, "step": 28728 }, { "epoch": 2.9531298180696886, "grad_norm": 0.05385474115610123, "learning_rate": 0.01, "loss": 2.0239, "step": 28731 }, { "epoch": 2.953438174529756, "grad_norm": 0.0803777202963829, "learning_rate": 0.01, "loss": 2.0155, "step": 28734 }, { "epoch": 2.9537465309898243, "grad_norm": 0.06875913590192795, "learning_rate": 0.01, "loss": 2.0237, "step": 28737 }, { "epoch": 2.954054887449892, "grad_norm": 0.09149770438671112, "learning_rate": 0.01, "loss": 2.0153, "step": 28740 }, { "epoch": 2.95436324390996, "grad_norm": 0.1182807981967926, "learning_rate": 0.01, "loss": 2.0117, "step": 28743 }, { "epoch": 2.9546716003700277, "grad_norm": 0.044717706739902496, "learning_rate": 0.01, "loss": 2.0233, "step": 28746 }, { "epoch": 2.954979956830096, "grad_norm": 0.04887622967362404, "learning_rate": 0.01, "loss": 2.0232, "step": 28749 }, { "epoch": 2.9552883132901635, "grad_norm": 0.041407834738492966, "learning_rate": 0.01, "loss": 1.9945, "step": 28752 }, { "epoch": 2.955596669750231, "grad_norm": 0.05831639841198921, "learning_rate": 0.01, "loss": 2.0485, "step": 28755 }, { "epoch": 2.9559050262102993, "grad_norm": 0.08279699832201004, "learning_rate": 0.01, "loss": 2.0275, "step": 28758 }, { "epoch": 2.956213382670367, "grad_norm": 0.08988698571920395, "learning_rate": 0.01, "loss": 1.9966, "step": 28761 }, { "epoch": 2.9565217391304346, "grad_norm": 0.0645131915807724, "learning_rate": 0.01, "loss": 2.0001, "step": 28764 }, { "epoch": 2.9568300955905027, "grad_norm": 0.08262491226196289, "learning_rate": 0.01, "loss": 1.999, "step": 28767 }, { "epoch": 2.9571384520505704, "grad_norm": 0.03773394227027893, "learning_rate": 0.01, "loss": 2.0108, "step": 28770 }, { "epoch": 2.9574468085106385, "grad_norm": 0.08012068271636963, "learning_rate": 0.01, "loss": 2.0032, "step": 28773 }, { "epoch": 2.957755164970706, "grad_norm": 0.04637681320309639, "learning_rate": 0.01, "loss": 2.0324, "step": 28776 }, { "epoch": 2.9580635214307742, "grad_norm": 0.031988177448511124, "learning_rate": 0.01, "loss": 1.9966, "step": 28779 }, { "epoch": 2.958371877890842, "grad_norm": 0.08325552940368652, "learning_rate": 0.01, "loss": 1.9993, "step": 28782 }, { "epoch": 2.9586802343509095, "grad_norm": 0.06131797283887863, "learning_rate": 0.01, "loss": 1.9963, "step": 28785 }, { "epoch": 2.9589885908109776, "grad_norm": 0.04146185144782066, "learning_rate": 0.01, "loss": 2.0353, "step": 28788 }, { "epoch": 2.9592969472710453, "grad_norm": 0.04004296660423279, "learning_rate": 0.01, "loss": 2.0113, "step": 28791 }, { "epoch": 2.959605303731113, "grad_norm": 0.07771418243646622, "learning_rate": 0.01, "loss": 2.025, "step": 28794 }, { "epoch": 2.959913660191181, "grad_norm": 0.048570699989795685, "learning_rate": 0.01, "loss": 2.0013, "step": 28797 }, { "epoch": 2.9602220166512487, "grad_norm": 0.0664374902844429, "learning_rate": 0.01, "loss": 2.0279, "step": 28800 }, { "epoch": 2.960530373111317, "grad_norm": 0.09744110703468323, "learning_rate": 0.01, "loss": 2.0058, "step": 28803 }, { "epoch": 2.9608387295713845, "grad_norm": 0.03430997580289841, "learning_rate": 0.01, "loss": 1.9996, "step": 28806 }, { "epoch": 2.9611470860314526, "grad_norm": 0.036999545991420746, "learning_rate": 0.01, "loss": 2.0146, "step": 28809 }, { "epoch": 2.9614554424915203, "grad_norm": 0.05290836840867996, "learning_rate": 0.01, "loss": 2.011, "step": 28812 }, { "epoch": 2.961763798951588, "grad_norm": 0.061555683612823486, "learning_rate": 0.01, "loss": 2.0145, "step": 28815 }, { "epoch": 2.962072155411656, "grad_norm": 0.06276807934045792, "learning_rate": 0.01, "loss": 2.0218, "step": 28818 }, { "epoch": 2.9623805118717237, "grad_norm": 0.05685890465974808, "learning_rate": 0.01, "loss": 1.9963, "step": 28821 }, { "epoch": 2.9626888683317913, "grad_norm": 0.09975235909223557, "learning_rate": 0.01, "loss": 2.0082, "step": 28824 }, { "epoch": 2.9629972247918595, "grad_norm": 0.04205232113599777, "learning_rate": 0.01, "loss": 2.0383, "step": 28827 }, { "epoch": 2.963305581251927, "grad_norm": 0.07952512800693512, "learning_rate": 0.01, "loss": 2.0059, "step": 28830 }, { "epoch": 2.963613937711995, "grad_norm": 0.04118579626083374, "learning_rate": 0.01, "loss": 1.9727, "step": 28833 }, { "epoch": 2.963922294172063, "grad_norm": 0.10875571519136429, "learning_rate": 0.01, "loss": 2.0153, "step": 28836 }, { "epoch": 2.964230650632131, "grad_norm": 0.16663865745067596, "learning_rate": 0.01, "loss": 2.001, "step": 28839 }, { "epoch": 2.9645390070921986, "grad_norm": 0.08370403200387955, "learning_rate": 0.01, "loss": 2.0158, "step": 28842 }, { "epoch": 2.9648473635522663, "grad_norm": 0.07209211587905884, "learning_rate": 0.01, "loss": 2.0271, "step": 28845 }, { "epoch": 2.9651557200123344, "grad_norm": 0.05848775431513786, "learning_rate": 0.01, "loss": 2.013, "step": 28848 }, { "epoch": 2.965464076472402, "grad_norm": 0.02791527472436428, "learning_rate": 0.01, "loss": 2.0331, "step": 28851 }, { "epoch": 2.9657724329324697, "grad_norm": 0.03520442545413971, "learning_rate": 0.01, "loss": 2.0346, "step": 28854 }, { "epoch": 2.966080789392538, "grad_norm": 0.03414986655116081, "learning_rate": 0.01, "loss": 2.0067, "step": 28857 }, { "epoch": 2.9663891458526055, "grad_norm": 0.0448157899081707, "learning_rate": 0.01, "loss": 2.0137, "step": 28860 }, { "epoch": 2.9666975023126736, "grad_norm": 0.09282497316598892, "learning_rate": 0.01, "loss": 2.0284, "step": 28863 }, { "epoch": 2.9670058587727413, "grad_norm": 0.05999337136745453, "learning_rate": 0.01, "loss": 2.0256, "step": 28866 }, { "epoch": 2.9673142152328094, "grad_norm": 0.038877278566360474, "learning_rate": 0.01, "loss": 1.9988, "step": 28869 }, { "epoch": 2.967622571692877, "grad_norm": 0.04760310798883438, "learning_rate": 0.01, "loss": 2.016, "step": 28872 }, { "epoch": 2.9679309281529447, "grad_norm": 0.049317944794893265, "learning_rate": 0.01, "loss": 1.9993, "step": 28875 }, { "epoch": 2.968239284613013, "grad_norm": 0.038830939680337906, "learning_rate": 0.01, "loss": 1.9969, "step": 28878 }, { "epoch": 2.9685476410730804, "grad_norm": 0.18675923347473145, "learning_rate": 0.01, "loss": 2.0217, "step": 28881 }, { "epoch": 2.968855997533148, "grad_norm": 0.04269779101014137, "learning_rate": 0.01, "loss": 2.0163, "step": 28884 }, { "epoch": 2.969164353993216, "grad_norm": 0.04740308225154877, "learning_rate": 0.01, "loss": 2.0184, "step": 28887 }, { "epoch": 2.969472710453284, "grad_norm": 0.058595623821020126, "learning_rate": 0.01, "loss": 2.0369, "step": 28890 }, { "epoch": 2.969781066913352, "grad_norm": 0.06136814132332802, "learning_rate": 0.01, "loss": 2.0229, "step": 28893 }, { "epoch": 2.9700894233734196, "grad_norm": 0.059679534286260605, "learning_rate": 0.01, "loss": 2.0151, "step": 28896 }, { "epoch": 2.9703977798334877, "grad_norm": 0.044182952493429184, "learning_rate": 0.01, "loss": 1.9977, "step": 28899 }, { "epoch": 2.9707061362935554, "grad_norm": 0.04152587056159973, "learning_rate": 0.01, "loss": 2.01, "step": 28902 }, { "epoch": 2.971014492753623, "grad_norm": 0.0424608550965786, "learning_rate": 0.01, "loss": 2.0228, "step": 28905 }, { "epoch": 2.971322849213691, "grad_norm": 0.040096018463373184, "learning_rate": 0.01, "loss": 1.9976, "step": 28908 }, { "epoch": 2.971631205673759, "grad_norm": 0.03288499265909195, "learning_rate": 0.01, "loss": 2.0416, "step": 28911 }, { "epoch": 2.9719395621338265, "grad_norm": 0.05150043964385986, "learning_rate": 0.01, "loss": 2.0, "step": 28914 }, { "epoch": 2.9722479185938946, "grad_norm": 0.05535360425710678, "learning_rate": 0.01, "loss": 2.0215, "step": 28917 }, { "epoch": 2.9725562750539622, "grad_norm": 0.09103085100650787, "learning_rate": 0.01, "loss": 2.0227, "step": 28920 }, { "epoch": 2.9728646315140304, "grad_norm": 0.060518983751535416, "learning_rate": 0.01, "loss": 1.9942, "step": 28923 }, { "epoch": 2.973172987974098, "grad_norm": 0.15921367704868317, "learning_rate": 0.01, "loss": 2.0203, "step": 28926 }, { "epoch": 2.973481344434166, "grad_norm": 0.04451125115156174, "learning_rate": 0.01, "loss": 2.0057, "step": 28929 }, { "epoch": 2.973789700894234, "grad_norm": 0.06099553778767586, "learning_rate": 0.01, "loss": 2.0317, "step": 28932 }, { "epoch": 2.9740980573543014, "grad_norm": 0.07864733040332794, "learning_rate": 0.01, "loss": 2.0392, "step": 28935 }, { "epoch": 2.9744064138143695, "grad_norm": 0.04279434680938721, "learning_rate": 0.01, "loss": 2.0189, "step": 28938 }, { "epoch": 2.974714770274437, "grad_norm": 0.06299113482236862, "learning_rate": 0.01, "loss": 2.0379, "step": 28941 }, { "epoch": 2.975023126734505, "grad_norm": 0.04803795740008354, "learning_rate": 0.01, "loss": 2.0161, "step": 28944 }, { "epoch": 2.975331483194573, "grad_norm": 0.04646730050444603, "learning_rate": 0.01, "loss": 2.026, "step": 28947 }, { "epoch": 2.9756398396546406, "grad_norm": 0.05352159962058067, "learning_rate": 0.01, "loss": 2.0114, "step": 28950 }, { "epoch": 2.9759481961147087, "grad_norm": 0.07281997799873352, "learning_rate": 0.01, "loss": 2.0328, "step": 28953 }, { "epoch": 2.9762565525747764, "grad_norm": 0.050049133598804474, "learning_rate": 0.01, "loss": 1.9943, "step": 28956 }, { "epoch": 2.9765649090348445, "grad_norm": 0.039795007556676865, "learning_rate": 0.01, "loss": 2.0128, "step": 28959 }, { "epoch": 2.976873265494912, "grad_norm": 0.09085065871477127, "learning_rate": 0.01, "loss": 2.005, "step": 28962 }, { "epoch": 2.97718162195498, "grad_norm": 0.1592961698770523, "learning_rate": 0.01, "loss": 2.0373, "step": 28965 }, { "epoch": 2.977489978415048, "grad_norm": 0.10092988610267639, "learning_rate": 0.01, "loss": 2.0009, "step": 28968 }, { "epoch": 2.9777983348751156, "grad_norm": 0.046051934361457825, "learning_rate": 0.01, "loss": 1.9942, "step": 28971 }, { "epoch": 2.9781066913351832, "grad_norm": 0.04267173632979393, "learning_rate": 0.01, "loss": 1.9997, "step": 28974 }, { "epoch": 2.9784150477952513, "grad_norm": 0.032134927809238434, "learning_rate": 0.01, "loss": 1.9694, "step": 28977 }, { "epoch": 2.978723404255319, "grad_norm": 0.055023569613695145, "learning_rate": 0.01, "loss": 2.0202, "step": 28980 }, { "epoch": 2.979031760715387, "grad_norm": 0.07177849858999252, "learning_rate": 0.01, "loss": 2.0287, "step": 28983 }, { "epoch": 2.9793401171754548, "grad_norm": 0.05890415981411934, "learning_rate": 0.01, "loss": 2.0226, "step": 28986 }, { "epoch": 2.979648473635523, "grad_norm": 0.04443821310997009, "learning_rate": 0.01, "loss": 2.042, "step": 28989 }, { "epoch": 2.9799568300955905, "grad_norm": 0.04153164103627205, "learning_rate": 0.01, "loss": 2.0111, "step": 28992 }, { "epoch": 2.980265186555658, "grad_norm": 0.07844668626785278, "learning_rate": 0.01, "loss": 2.0281, "step": 28995 }, { "epoch": 2.9805735430157263, "grad_norm": 0.09027374535799026, "learning_rate": 0.01, "loss": 2.0325, "step": 28998 }, { "epoch": 2.980881899475794, "grad_norm": 0.04559837654232979, "learning_rate": 0.01, "loss": 1.9926, "step": 29001 }, { "epoch": 2.9811902559358616, "grad_norm": 0.03771069645881653, "learning_rate": 0.01, "loss": 1.9966, "step": 29004 }, { "epoch": 2.9814986123959297, "grad_norm": 0.09008541703224182, "learning_rate": 0.01, "loss": 2.0246, "step": 29007 }, { "epoch": 2.9818069688559974, "grad_norm": 0.060089852660894394, "learning_rate": 0.01, "loss": 2.0127, "step": 29010 }, { "epoch": 2.9821153253160655, "grad_norm": 0.11291385442018509, "learning_rate": 0.01, "loss": 2.0397, "step": 29013 }, { "epoch": 2.982423681776133, "grad_norm": 0.09720003604888916, "learning_rate": 0.01, "loss": 2.0277, "step": 29016 }, { "epoch": 2.9827320382362013, "grad_norm": 0.060459867119789124, "learning_rate": 0.01, "loss": 1.9914, "step": 29019 }, { "epoch": 2.983040394696269, "grad_norm": 0.07619535177946091, "learning_rate": 0.01, "loss": 1.9964, "step": 29022 }, { "epoch": 2.9833487511563366, "grad_norm": 0.05402089282870293, "learning_rate": 0.01, "loss": 1.9867, "step": 29025 }, { "epoch": 2.9836571076164047, "grad_norm": 0.05162883922457695, "learning_rate": 0.01, "loss": 2.0135, "step": 29028 }, { "epoch": 2.9839654640764723, "grad_norm": 0.037369467318058014, "learning_rate": 0.01, "loss": 2.0232, "step": 29031 }, { "epoch": 2.98427382053654, "grad_norm": 0.05483279377222061, "learning_rate": 0.01, "loss": 2.0208, "step": 29034 }, { "epoch": 2.984582176996608, "grad_norm": 0.050847407430410385, "learning_rate": 0.01, "loss": 2.0043, "step": 29037 }, { "epoch": 2.9848905334566758, "grad_norm": 0.07075628638267517, "learning_rate": 0.01, "loss": 2.0408, "step": 29040 }, { "epoch": 2.985198889916744, "grad_norm": 0.05580511689186096, "learning_rate": 0.01, "loss": 2.0142, "step": 29043 }, { "epoch": 2.9855072463768115, "grad_norm": 0.043689336627721786, "learning_rate": 0.01, "loss": 2.0118, "step": 29046 }, { "epoch": 2.9858156028368796, "grad_norm": 0.07577245682477951, "learning_rate": 0.01, "loss": 2.0287, "step": 29049 }, { "epoch": 2.9861239592969473, "grad_norm": 0.07917779684066772, "learning_rate": 0.01, "loss": 2.0148, "step": 29052 }, { "epoch": 2.986432315757015, "grad_norm": 0.07137954980134964, "learning_rate": 0.01, "loss": 2.0007, "step": 29055 }, { "epoch": 2.986740672217083, "grad_norm": 0.09875428676605225, "learning_rate": 0.01, "loss": 2.0242, "step": 29058 }, { "epoch": 2.9870490286771507, "grad_norm": 0.059274058789014816, "learning_rate": 0.01, "loss": 2.0212, "step": 29061 }, { "epoch": 2.9873573851372184, "grad_norm": 0.04235726222395897, "learning_rate": 0.01, "loss": 2.0178, "step": 29064 }, { "epoch": 2.9876657415972865, "grad_norm": 0.1677253097295761, "learning_rate": 0.01, "loss": 1.9921, "step": 29067 }, { "epoch": 2.987974098057354, "grad_norm": 0.05953408405184746, "learning_rate": 0.01, "loss": 2.0138, "step": 29070 }, { "epoch": 2.9882824545174222, "grad_norm": 0.031471315771341324, "learning_rate": 0.01, "loss": 1.9953, "step": 29073 }, { "epoch": 2.98859081097749, "grad_norm": 0.0490463487803936, "learning_rate": 0.01, "loss": 2.0193, "step": 29076 }, { "epoch": 2.988899167437558, "grad_norm": 0.0685456171631813, "learning_rate": 0.01, "loss": 2.0459, "step": 29079 }, { "epoch": 2.9892075238976257, "grad_norm": 0.04472583159804344, "learning_rate": 0.01, "loss": 1.9909, "step": 29082 }, { "epoch": 2.9895158803576933, "grad_norm": 0.03528788685798645, "learning_rate": 0.01, "loss": 2.0024, "step": 29085 }, { "epoch": 2.9898242368177614, "grad_norm": 0.04196497052907944, "learning_rate": 0.01, "loss": 2.0015, "step": 29088 }, { "epoch": 2.990132593277829, "grad_norm": 0.1347953975200653, "learning_rate": 0.01, "loss": 1.9983, "step": 29091 }, { "epoch": 2.9904409497378968, "grad_norm": 0.03483536094427109, "learning_rate": 0.01, "loss": 2.0038, "step": 29094 }, { "epoch": 2.990749306197965, "grad_norm": 0.04203316941857338, "learning_rate": 0.01, "loss": 2.0364, "step": 29097 }, { "epoch": 2.9910576626580325, "grad_norm": 0.039628706872463226, "learning_rate": 0.01, "loss": 2.0212, "step": 29100 }, { "epoch": 2.9913660191181006, "grad_norm": 0.04524881765246391, "learning_rate": 0.01, "loss": 2.0436, "step": 29103 }, { "epoch": 2.9916743755781683, "grad_norm": 0.0704786404967308, "learning_rate": 0.01, "loss": 2.0084, "step": 29106 }, { "epoch": 2.9919827320382364, "grad_norm": 0.06169109791517258, "learning_rate": 0.01, "loss": 1.9879, "step": 29109 }, { "epoch": 2.992291088498304, "grad_norm": 0.05929429456591606, "learning_rate": 0.01, "loss": 2.0341, "step": 29112 }, { "epoch": 2.9925994449583717, "grad_norm": 0.06046414375305176, "learning_rate": 0.01, "loss": 2.0212, "step": 29115 }, { "epoch": 2.99290780141844, "grad_norm": 0.03632686287164688, "learning_rate": 0.01, "loss": 2.0288, "step": 29118 }, { "epoch": 2.9932161578785075, "grad_norm": 0.0416223518550396, "learning_rate": 0.01, "loss": 1.9955, "step": 29121 }, { "epoch": 2.993524514338575, "grad_norm": 0.033993568271398544, "learning_rate": 0.01, "loss": 2.0373, "step": 29124 }, { "epoch": 2.9938328707986432, "grad_norm": 0.11413303017616272, "learning_rate": 0.01, "loss": 2.0219, "step": 29127 }, { "epoch": 2.9941412272587113, "grad_norm": 0.04512523114681244, "learning_rate": 0.01, "loss": 2.0197, "step": 29130 }, { "epoch": 2.994449583718779, "grad_norm": 0.08577805757522583, "learning_rate": 0.01, "loss": 2.0263, "step": 29133 }, { "epoch": 2.9947579401788467, "grad_norm": 0.09573300927877426, "learning_rate": 0.01, "loss": 2.0057, "step": 29136 }, { "epoch": 2.9950662966389148, "grad_norm": 0.04170147702097893, "learning_rate": 0.01, "loss": 2.0287, "step": 29139 }, { "epoch": 2.9953746530989824, "grad_norm": 0.04233024641871452, "learning_rate": 0.01, "loss": 2.0084, "step": 29142 }, { "epoch": 2.99568300955905, "grad_norm": 0.05406009405851364, "learning_rate": 0.01, "loss": 2.0249, "step": 29145 }, { "epoch": 2.995991366019118, "grad_norm": 0.037997808307409286, "learning_rate": 0.01, "loss": 2.0106, "step": 29148 }, { "epoch": 2.996299722479186, "grad_norm": 0.08010167628526688, "learning_rate": 0.01, "loss": 1.9899, "step": 29151 }, { "epoch": 2.9966080789392535, "grad_norm": 0.05372076854109764, "learning_rate": 0.01, "loss": 2.0406, "step": 29154 }, { "epoch": 2.9969164353993216, "grad_norm": 0.04186830669641495, "learning_rate": 0.01, "loss": 2.0047, "step": 29157 }, { "epoch": 2.9972247918593897, "grad_norm": 0.0803917944431305, "learning_rate": 0.01, "loss": 2.0106, "step": 29160 }, { "epoch": 2.9975331483194574, "grad_norm": 0.06086145341396332, "learning_rate": 0.01, "loss": 2.0016, "step": 29163 }, { "epoch": 2.997841504779525, "grad_norm": 0.1197366788983345, "learning_rate": 0.01, "loss": 2.0025, "step": 29166 }, { "epoch": 2.998149861239593, "grad_norm": 0.09424477070569992, "learning_rate": 0.01, "loss": 2.0032, "step": 29169 }, { "epoch": 2.998458217699661, "grad_norm": 0.05025864019989967, "learning_rate": 0.01, "loss": 2.0166, "step": 29172 }, { "epoch": 2.9987665741597285, "grad_norm": 0.0840819776058197, "learning_rate": 0.01, "loss": 1.9983, "step": 29175 }, { "epoch": 2.9990749306197966, "grad_norm": 0.03415597230195999, "learning_rate": 0.01, "loss": 2.0233, "step": 29178 }, { "epoch": 2.9993832870798642, "grad_norm": 0.09066252410411835, "learning_rate": 0.01, "loss": 2.0245, "step": 29181 }, { "epoch": 2.999691643539932, "grad_norm": 0.05019732564687729, "learning_rate": 0.01, "loss": 1.9893, "step": 29184 }, { "epoch": 3.0, "grad_norm": 0.09318925440311432, "learning_rate": 0.01, "loss": 2.0062, "step": 29187 }, { "epoch": 2.999383477188656, "grad_norm": 0.08568185567855835, "learning_rate": 0.01, "loss": 2.0409, "step": 29190 }, { "epoch": 2.9996917385943282, "grad_norm": 0.05479726567864418, "learning_rate": 0.01, "loss": 1.9992, "step": 29193 }, { "epoch": 3.0, "grad_norm": 0.0812709629535675, "learning_rate": 0.01, "loss": 2.0193, "step": 29196 }, { "epoch": 3.000308261405672, "grad_norm": 0.06423875689506531, "learning_rate": 0.01, "loss": 2.0037, "step": 29199 }, { "epoch": 3.000616522811344, "grad_norm": 0.06900475919246674, "learning_rate": 0.01, "loss": 2.0235, "step": 29202 }, { "epoch": 3.000924784217016, "grad_norm": 0.09736461192369461, "learning_rate": 0.01, "loss": 2.0269, "step": 29205 }, { "epoch": 3.001233045622688, "grad_norm": 0.10431299358606339, "learning_rate": 0.01, "loss": 2.0366, "step": 29208 }, { "epoch": 3.00154130702836, "grad_norm": 0.06412489712238312, "learning_rate": 0.01, "loss": 2.0336, "step": 29211 }, { "epoch": 3.0018495684340323, "grad_norm": 0.07305043190717697, "learning_rate": 0.01, "loss": 2.0355, "step": 29214 }, { "epoch": 3.002157829839704, "grad_norm": 0.07680006325244904, "learning_rate": 0.01, "loss": 2.031, "step": 29217 }, { "epoch": 3.0024660912453762, "grad_norm": 0.04416871443390846, "learning_rate": 0.01, "loss": 2.0228, "step": 29220 }, { "epoch": 3.002774352651048, "grad_norm": 0.05895330011844635, "learning_rate": 0.01, "loss": 2.0124, "step": 29223 }, { "epoch": 3.00308261405672, "grad_norm": 0.16763944923877716, "learning_rate": 0.01, "loss": 2.0521, "step": 29226 }, { "epoch": 3.003390875462392, "grad_norm": 0.04152580350637436, "learning_rate": 0.01, "loss": 2.0144, "step": 29229 }, { "epoch": 3.003699136868064, "grad_norm": 0.05650210753083229, "learning_rate": 0.01, "loss": 2.0373, "step": 29232 }, { "epoch": 3.0040073982737363, "grad_norm": 0.10183783620595932, "learning_rate": 0.01, "loss": 2.0374, "step": 29235 }, { "epoch": 3.004315659679408, "grad_norm": 0.11545984447002411, "learning_rate": 0.01, "loss": 2.0359, "step": 29238 }, { "epoch": 3.0046239210850803, "grad_norm": 0.07768990844488144, "learning_rate": 0.01, "loss": 2.0222, "step": 29241 }, { "epoch": 3.004932182490752, "grad_norm": 0.06256531924009323, "learning_rate": 0.01, "loss": 2.0332, "step": 29244 }, { "epoch": 3.005240443896424, "grad_norm": 0.041665416210889816, "learning_rate": 0.01, "loss": 2.0061, "step": 29247 }, { "epoch": 3.0055487053020964, "grad_norm": 0.051996082067489624, "learning_rate": 0.01, "loss": 2.0237, "step": 29250 }, { "epoch": 3.005856966707768, "grad_norm": 0.032815441489219666, "learning_rate": 0.01, "loss": 2.0203, "step": 29253 }, { "epoch": 3.0061652281134403, "grad_norm": 0.060963522642850876, "learning_rate": 0.01, "loss": 2.0293, "step": 29256 }, { "epoch": 3.006473489519112, "grad_norm": 0.11818858236074448, "learning_rate": 0.01, "loss": 2.0246, "step": 29259 }, { "epoch": 3.0067817509247843, "grad_norm": 0.08653881400823593, "learning_rate": 0.01, "loss": 2.0208, "step": 29262 }, { "epoch": 3.007090012330456, "grad_norm": 0.09629751741886139, "learning_rate": 0.01, "loss": 1.9929, "step": 29265 }, { "epoch": 3.0073982737361282, "grad_norm": 0.04238956794142723, "learning_rate": 0.01, "loss": 2.0112, "step": 29268 }, { "epoch": 3.0077065351418004, "grad_norm": 0.040573202073574066, "learning_rate": 0.01, "loss": 2.0159, "step": 29271 }, { "epoch": 3.008014796547472, "grad_norm": 0.08061878383159637, "learning_rate": 0.01, "loss": 2.0562, "step": 29274 }, { "epoch": 3.0083230579531444, "grad_norm": 0.06944199651479721, "learning_rate": 0.01, "loss": 2.0077, "step": 29277 }, { "epoch": 3.008631319358816, "grad_norm": 0.06629909574985504, "learning_rate": 0.01, "loss": 1.9934, "step": 29280 }, { "epoch": 3.0089395807644883, "grad_norm": 0.06654530018568039, "learning_rate": 0.01, "loss": 2.0018, "step": 29283 }, { "epoch": 3.0092478421701605, "grad_norm": 0.08806800842285156, "learning_rate": 0.01, "loss": 2.0033, "step": 29286 }, { "epoch": 3.0095561035758323, "grad_norm": 0.12367472797632217, "learning_rate": 0.01, "loss": 2.0382, "step": 29289 }, { "epoch": 3.0098643649815044, "grad_norm": 0.052121471613645554, "learning_rate": 0.01, "loss": 2.0278, "step": 29292 }, { "epoch": 3.010172626387176, "grad_norm": 0.0409090481698513, "learning_rate": 0.01, "loss": 2.0146, "step": 29295 }, { "epoch": 3.0104808877928484, "grad_norm": 0.09956279397010803, "learning_rate": 0.01, "loss": 2.0339, "step": 29298 }, { "epoch": 3.01078914919852, "grad_norm": 0.04090806096792221, "learning_rate": 0.01, "loss": 2.031, "step": 29301 }, { "epoch": 3.0110974106041923, "grad_norm": 0.06308458000421524, "learning_rate": 0.01, "loss": 2.0347, "step": 29304 }, { "epoch": 3.0114056720098645, "grad_norm": 0.05140318349003792, "learning_rate": 0.01, "loss": 2.0341, "step": 29307 }, { "epoch": 3.0117139334155363, "grad_norm": 0.0383441224694252, "learning_rate": 0.01, "loss": 2.0167, "step": 29310 }, { "epoch": 3.0120221948212085, "grad_norm": 0.06803669035434723, "learning_rate": 0.01, "loss": 2.0442, "step": 29313 }, { "epoch": 3.0123304562268802, "grad_norm": 0.042336028069257736, "learning_rate": 0.01, "loss": 2.018, "step": 29316 }, { "epoch": 3.0126387176325524, "grad_norm": 0.052575305104255676, "learning_rate": 0.01, "loss": 1.9956, "step": 29319 }, { "epoch": 3.0129469790382246, "grad_norm": 0.04428831860423088, "learning_rate": 0.01, "loss": 2.0225, "step": 29322 }, { "epoch": 3.0132552404438964, "grad_norm": 0.03720409423112869, "learning_rate": 0.01, "loss": 2.0135, "step": 29325 }, { "epoch": 3.0135635018495686, "grad_norm": 0.03491399809718132, "learning_rate": 0.01, "loss": 1.9993, "step": 29328 }, { "epoch": 3.0138717632552403, "grad_norm": 0.11868512630462646, "learning_rate": 0.01, "loss": 2.014, "step": 29331 }, { "epoch": 3.0141800246609125, "grad_norm": 0.05709204822778702, "learning_rate": 0.01, "loss": 2.0043, "step": 29334 }, { "epoch": 3.0144882860665843, "grad_norm": 0.09501231461763382, "learning_rate": 0.01, "loss": 2.0056, "step": 29337 }, { "epoch": 3.0147965474722564, "grad_norm": 0.04431547597050667, "learning_rate": 0.01, "loss": 2.0223, "step": 29340 }, { "epoch": 3.0151048088779286, "grad_norm": 0.07580556720495224, "learning_rate": 0.01, "loss": 2.0343, "step": 29343 }, { "epoch": 3.0154130702836004, "grad_norm": 0.05567536503076553, "learning_rate": 0.01, "loss": 2.0327, "step": 29346 }, { "epoch": 3.0157213316892726, "grad_norm": 0.03637940436601639, "learning_rate": 0.01, "loss": 2.0366, "step": 29349 }, { "epoch": 3.0160295930949443, "grad_norm": 0.07379139959812164, "learning_rate": 0.01, "loss": 2.0196, "step": 29352 }, { "epoch": 3.0163378545006165, "grad_norm": 0.08311998844146729, "learning_rate": 0.01, "loss": 2.0013, "step": 29355 }, { "epoch": 3.0166461159062887, "grad_norm": 0.11170487850904465, "learning_rate": 0.01, "loss": 2.0174, "step": 29358 }, { "epoch": 3.0169543773119605, "grad_norm": 0.055827848613262177, "learning_rate": 0.01, "loss": 2.0184, "step": 29361 }, { "epoch": 3.0172626387176327, "grad_norm": 0.06052641570568085, "learning_rate": 0.01, "loss": 2.0238, "step": 29364 }, { "epoch": 3.0175709001233044, "grad_norm": 0.03150554001331329, "learning_rate": 0.01, "loss": 2.024, "step": 29367 }, { "epoch": 3.0178791615289766, "grad_norm": 0.06298622488975525, "learning_rate": 0.01, "loss": 2.0132, "step": 29370 }, { "epoch": 3.0181874229346484, "grad_norm": 0.09742710739374161, "learning_rate": 0.01, "loss": 2.0411, "step": 29373 }, { "epoch": 3.0184956843403206, "grad_norm": 0.07219108939170837, "learning_rate": 0.01, "loss": 2.0113, "step": 29376 }, { "epoch": 3.0188039457459928, "grad_norm": 0.05558139458298683, "learning_rate": 0.01, "loss": 2.0075, "step": 29379 }, { "epoch": 3.0191122071516645, "grad_norm": 0.05763263627886772, "learning_rate": 0.01, "loss": 2.0433, "step": 29382 }, { "epoch": 3.0194204685573367, "grad_norm": 0.04832174628973007, "learning_rate": 0.01, "loss": 2.007, "step": 29385 }, { "epoch": 3.0197287299630085, "grad_norm": 0.03823337331414223, "learning_rate": 0.01, "loss": 2.0307, "step": 29388 }, { "epoch": 3.0200369913686806, "grad_norm": 0.05253903195261955, "learning_rate": 0.01, "loss": 2.0018, "step": 29391 }, { "epoch": 3.020345252774353, "grad_norm": 0.10889255255460739, "learning_rate": 0.01, "loss": 2.0106, "step": 29394 }, { "epoch": 3.0206535141800246, "grad_norm": 0.04247021675109863, "learning_rate": 0.01, "loss": 2.0466, "step": 29397 }, { "epoch": 3.020961775585697, "grad_norm": 0.09347319602966309, "learning_rate": 0.01, "loss": 2.037, "step": 29400 }, { "epoch": 3.0212700369913685, "grad_norm": 0.05651739612221718, "learning_rate": 0.01, "loss": 2.0385, "step": 29403 }, { "epoch": 3.0215782983970407, "grad_norm": 0.0666181743144989, "learning_rate": 0.01, "loss": 2.0274, "step": 29406 }, { "epoch": 3.021886559802713, "grad_norm": 0.053186848759651184, "learning_rate": 0.01, "loss": 2.0127, "step": 29409 }, { "epoch": 3.0221948212083847, "grad_norm": 0.05201537534594536, "learning_rate": 0.01, "loss": 2.0009, "step": 29412 }, { "epoch": 3.022503082614057, "grad_norm": 0.02726483717560768, "learning_rate": 0.01, "loss": 2.0135, "step": 29415 }, { "epoch": 3.0228113440197286, "grad_norm": 0.10047302395105362, "learning_rate": 0.01, "loss": 2.0336, "step": 29418 }, { "epoch": 3.023119605425401, "grad_norm": 0.05461571365594864, "learning_rate": 0.01, "loss": 2.0213, "step": 29421 }, { "epoch": 3.0234278668310726, "grad_norm": 0.08373844623565674, "learning_rate": 0.01, "loss": 2.0246, "step": 29424 }, { "epoch": 3.0237361282367448, "grad_norm": 0.045885663479566574, "learning_rate": 0.01, "loss": 2.044, "step": 29427 }, { "epoch": 3.024044389642417, "grad_norm": 0.054790932685136795, "learning_rate": 0.01, "loss": 2.0387, "step": 29430 }, { "epoch": 3.0243526510480887, "grad_norm": 0.04917832091450691, "learning_rate": 0.01, "loss": 2.025, "step": 29433 }, { "epoch": 3.024660912453761, "grad_norm": 0.09375031292438507, "learning_rate": 0.01, "loss": 2.0482, "step": 29436 }, { "epoch": 3.0249691738594326, "grad_norm": 0.051234230399131775, "learning_rate": 0.01, "loss": 2.0024, "step": 29439 }, { "epoch": 3.025277435265105, "grad_norm": 0.057380858808755875, "learning_rate": 0.01, "loss": 2.0011, "step": 29442 }, { "epoch": 3.025585696670777, "grad_norm": 0.060605574399232864, "learning_rate": 0.01, "loss": 2.0158, "step": 29445 }, { "epoch": 3.025893958076449, "grad_norm": 0.09125442057847977, "learning_rate": 0.01, "loss": 2.0284, "step": 29448 }, { "epoch": 3.026202219482121, "grad_norm": 0.04081615433096886, "learning_rate": 0.01, "loss": 2.0029, "step": 29451 }, { "epoch": 3.0265104808877927, "grad_norm": 0.05347365140914917, "learning_rate": 0.01, "loss": 2.0319, "step": 29454 }, { "epoch": 3.026818742293465, "grad_norm": 0.04458871856331825, "learning_rate": 0.01, "loss": 2.0035, "step": 29457 }, { "epoch": 3.0271270036991367, "grad_norm": 0.04996645078063011, "learning_rate": 0.01, "loss": 2.0046, "step": 29460 }, { "epoch": 3.027435265104809, "grad_norm": 0.051296427845954895, "learning_rate": 0.01, "loss": 2.0099, "step": 29463 }, { "epoch": 3.027743526510481, "grad_norm": 0.05448664352297783, "learning_rate": 0.01, "loss": 2.0082, "step": 29466 }, { "epoch": 3.028051787916153, "grad_norm": 0.0587022639811039, "learning_rate": 0.01, "loss": 2.0083, "step": 29469 }, { "epoch": 3.028360049321825, "grad_norm": 0.0711282268166542, "learning_rate": 0.01, "loss": 1.9997, "step": 29472 }, { "epoch": 3.0286683107274968, "grad_norm": 0.14440664649009705, "learning_rate": 0.01, "loss": 2.0525, "step": 29475 }, { "epoch": 3.028976572133169, "grad_norm": 0.06147081404924393, "learning_rate": 0.01, "loss": 1.9979, "step": 29478 }, { "epoch": 3.029284833538841, "grad_norm": 0.0842541828751564, "learning_rate": 0.01, "loss": 2.0075, "step": 29481 }, { "epoch": 3.029593094944513, "grad_norm": 0.04915475845336914, "learning_rate": 0.01, "loss": 2.029, "step": 29484 }, { "epoch": 3.029901356350185, "grad_norm": 0.04254012182354927, "learning_rate": 0.01, "loss": 2.0406, "step": 29487 }, { "epoch": 3.030209617755857, "grad_norm": 0.03716140240430832, "learning_rate": 0.01, "loss": 2.0477, "step": 29490 }, { "epoch": 3.030517879161529, "grad_norm": 0.09156777709722519, "learning_rate": 0.01, "loss": 2.0178, "step": 29493 }, { "epoch": 3.030826140567201, "grad_norm": 0.05401970446109772, "learning_rate": 0.01, "loss": 2.005, "step": 29496 }, { "epoch": 3.031134401972873, "grad_norm": 0.11564016342163086, "learning_rate": 0.01, "loss": 2.0344, "step": 29499 }, { "epoch": 3.031442663378545, "grad_norm": 0.12813927233219147, "learning_rate": 0.01, "loss": 2.0159, "step": 29502 }, { "epoch": 3.031750924784217, "grad_norm": 0.04971994087100029, "learning_rate": 0.01, "loss": 2.0122, "step": 29505 }, { "epoch": 3.032059186189889, "grad_norm": 0.037013307213783264, "learning_rate": 0.01, "loss": 2.0165, "step": 29508 }, { "epoch": 3.032367447595561, "grad_norm": 0.048204224556684494, "learning_rate": 0.01, "loss": 2.0635, "step": 29511 }, { "epoch": 3.032675709001233, "grad_norm": 0.034393493086099625, "learning_rate": 0.01, "loss": 2.0283, "step": 29514 }, { "epoch": 3.0329839704069053, "grad_norm": 0.041031621396541595, "learning_rate": 0.01, "loss": 2.0062, "step": 29517 }, { "epoch": 3.033292231812577, "grad_norm": 0.049610815942287445, "learning_rate": 0.01, "loss": 2.015, "step": 29520 }, { "epoch": 3.033600493218249, "grad_norm": 0.07062069326639175, "learning_rate": 0.01, "loss": 2.0216, "step": 29523 }, { "epoch": 3.033908754623921, "grad_norm": 0.040892720222473145, "learning_rate": 0.01, "loss": 2.0223, "step": 29526 }, { "epoch": 3.034217016029593, "grad_norm": 0.036804720759391785, "learning_rate": 0.01, "loss": 2.0303, "step": 29529 }, { "epoch": 3.034525277435265, "grad_norm": 0.06749982386827469, "learning_rate": 0.01, "loss": 2.0267, "step": 29532 }, { "epoch": 3.034833538840937, "grad_norm": 0.07996654510498047, "learning_rate": 0.01, "loss": 2.0587, "step": 29535 }, { "epoch": 3.0351418002466093, "grad_norm": 0.05621659383177757, "learning_rate": 0.01, "loss": 2.0119, "step": 29538 }, { "epoch": 3.035450061652281, "grad_norm": 0.04464147612452507, "learning_rate": 0.01, "loss": 2.0308, "step": 29541 }, { "epoch": 3.0357583230579532, "grad_norm": 0.04619302973151207, "learning_rate": 0.01, "loss": 2.0028, "step": 29544 }, { "epoch": 3.036066584463625, "grad_norm": 0.09262579679489136, "learning_rate": 0.01, "loss": 2.0457, "step": 29547 }, { "epoch": 3.036374845869297, "grad_norm": 0.07954932749271393, "learning_rate": 0.01, "loss": 1.9981, "step": 29550 }, { "epoch": 3.0366831072749694, "grad_norm": 0.08746150135993958, "learning_rate": 0.01, "loss": 2.0072, "step": 29553 }, { "epoch": 3.036991368680641, "grad_norm": 0.04415113106369972, "learning_rate": 0.01, "loss": 2.0113, "step": 29556 }, { "epoch": 3.0372996300863133, "grad_norm": 0.08307299762964249, "learning_rate": 0.01, "loss": 2.0329, "step": 29559 }, { "epoch": 3.037607891491985, "grad_norm": 0.05882325395941734, "learning_rate": 0.01, "loss": 2.0094, "step": 29562 }, { "epoch": 3.0379161528976573, "grad_norm": 0.08524999022483826, "learning_rate": 0.01, "loss": 2.0039, "step": 29565 }, { "epoch": 3.038224414303329, "grad_norm": 0.07542447000741959, "learning_rate": 0.01, "loss": 2.0156, "step": 29568 }, { "epoch": 3.038532675709001, "grad_norm": 0.06394769251346588, "learning_rate": 0.01, "loss": 2.0551, "step": 29571 }, { "epoch": 3.0388409371146734, "grad_norm": 0.07811316847801208, "learning_rate": 0.01, "loss": 2.0198, "step": 29574 }, { "epoch": 3.039149198520345, "grad_norm": 0.0849740207195282, "learning_rate": 0.01, "loss": 2.0276, "step": 29577 }, { "epoch": 3.0394574599260173, "grad_norm": 0.05540047585964203, "learning_rate": 0.01, "loss": 2.0383, "step": 29580 }, { "epoch": 3.039765721331689, "grad_norm": 0.044082462787628174, "learning_rate": 0.01, "loss": 1.9947, "step": 29583 }, { "epoch": 3.0400739827373613, "grad_norm": 0.11179853975772858, "learning_rate": 0.01, "loss": 2.011, "step": 29586 }, { "epoch": 3.0403822441430335, "grad_norm": 0.10160455852746964, "learning_rate": 0.01, "loss": 2.0303, "step": 29589 }, { "epoch": 3.0406905055487052, "grad_norm": 0.08072539418935776, "learning_rate": 0.01, "loss": 2.0351, "step": 29592 }, { "epoch": 3.0409987669543774, "grad_norm": 0.039637934416532516, "learning_rate": 0.01, "loss": 2.0304, "step": 29595 }, { "epoch": 3.041307028360049, "grad_norm": 0.05847008153796196, "learning_rate": 0.01, "loss": 2.02, "step": 29598 }, { "epoch": 3.0416152897657214, "grad_norm": 0.04309094697237015, "learning_rate": 0.01, "loss": 2.0445, "step": 29601 }, { "epoch": 3.041923551171393, "grad_norm": 0.04230069741606712, "learning_rate": 0.01, "loss": 2.0014, "step": 29604 }, { "epoch": 3.0422318125770653, "grad_norm": 0.08122226595878601, "learning_rate": 0.01, "loss": 2.0214, "step": 29607 }, { "epoch": 3.0425400739827375, "grad_norm": 0.05134423449635506, "learning_rate": 0.01, "loss": 1.9923, "step": 29610 }, { "epoch": 3.0428483353884093, "grad_norm": 0.06343290954828262, "learning_rate": 0.01, "loss": 2.0146, "step": 29613 }, { "epoch": 3.0431565967940815, "grad_norm": 0.04415202513337135, "learning_rate": 0.01, "loss": 2.0189, "step": 29616 }, { "epoch": 3.043464858199753, "grad_norm": 0.09160872548818588, "learning_rate": 0.01, "loss": 2.0045, "step": 29619 }, { "epoch": 3.0437731196054254, "grad_norm": 0.0800345167517662, "learning_rate": 0.01, "loss": 2.0214, "step": 29622 }, { "epoch": 3.0440813810110976, "grad_norm": 0.11805953085422516, "learning_rate": 0.01, "loss": 2.0165, "step": 29625 }, { "epoch": 3.0443896424167693, "grad_norm": 0.036926135420799255, "learning_rate": 0.01, "loss": 2.0115, "step": 29628 }, { "epoch": 3.0446979038224415, "grad_norm": 0.04666229337453842, "learning_rate": 0.01, "loss": 2.0178, "step": 29631 }, { "epoch": 3.0450061652281133, "grad_norm": 0.152203768491745, "learning_rate": 0.01, "loss": 2.0173, "step": 29634 }, { "epoch": 3.0453144266337855, "grad_norm": 0.07508762180805206, "learning_rate": 0.01, "loss": 2.0276, "step": 29637 }, { "epoch": 3.0456226880394572, "grad_norm": 0.07548423856496811, "learning_rate": 0.01, "loss": 2.006, "step": 29640 }, { "epoch": 3.0459309494451294, "grad_norm": 0.03427097201347351, "learning_rate": 0.01, "loss": 2.0229, "step": 29643 }, { "epoch": 3.0462392108508016, "grad_norm": 0.0399865061044693, "learning_rate": 0.01, "loss": 2.012, "step": 29646 }, { "epoch": 3.0465474722564734, "grad_norm": 0.03335277736186981, "learning_rate": 0.01, "loss": 2.0083, "step": 29649 }, { "epoch": 3.0468557336621456, "grad_norm": 0.0520477369427681, "learning_rate": 0.01, "loss": 2.0389, "step": 29652 }, { "epoch": 3.0471639950678173, "grad_norm": 0.08565925061702728, "learning_rate": 0.01, "loss": 2.0088, "step": 29655 }, { "epoch": 3.0474722564734895, "grad_norm": 0.062498703598976135, "learning_rate": 0.01, "loss": 1.9971, "step": 29658 }, { "epoch": 3.0477805178791617, "grad_norm": 0.08171094208955765, "learning_rate": 0.01, "loss": 1.9849, "step": 29661 }, { "epoch": 3.0480887792848335, "grad_norm": 0.06803334504365921, "learning_rate": 0.01, "loss": 2.0127, "step": 29664 }, { "epoch": 3.0483970406905057, "grad_norm": 0.113568514585495, "learning_rate": 0.01, "loss": 2.0074, "step": 29667 }, { "epoch": 3.0487053020961774, "grad_norm": 0.049806151539087296, "learning_rate": 0.01, "loss": 2.0144, "step": 29670 }, { "epoch": 3.0490135635018496, "grad_norm": 0.05908326804637909, "learning_rate": 0.01, "loss": 2.0229, "step": 29673 }, { "epoch": 3.049321824907522, "grad_norm": 0.0421968549489975, "learning_rate": 0.01, "loss": 2.038, "step": 29676 }, { "epoch": 3.0496300863131935, "grad_norm": 0.04743592441082001, "learning_rate": 0.01, "loss": 2.0126, "step": 29679 }, { "epoch": 3.0499383477188657, "grad_norm": 0.11457180231809616, "learning_rate": 0.01, "loss": 2.0057, "step": 29682 }, { "epoch": 3.0502466091245375, "grad_norm": 0.052697159349918365, "learning_rate": 0.01, "loss": 2.0276, "step": 29685 }, { "epoch": 3.0505548705302097, "grad_norm": 0.140494704246521, "learning_rate": 0.01, "loss": 2.0083, "step": 29688 }, { "epoch": 3.0508631319358814, "grad_norm": 0.03548673912882805, "learning_rate": 0.01, "loss": 1.9985, "step": 29691 }, { "epoch": 3.0511713933415536, "grad_norm": 0.03223096579313278, "learning_rate": 0.01, "loss": 2.0254, "step": 29694 }, { "epoch": 3.051479654747226, "grad_norm": 0.040596138685941696, "learning_rate": 0.01, "loss": 2.0179, "step": 29697 }, { "epoch": 3.0517879161528976, "grad_norm": 0.09559677541255951, "learning_rate": 0.01, "loss": 2.0298, "step": 29700 }, { "epoch": 3.0520961775585698, "grad_norm": 0.10038384050130844, "learning_rate": 0.01, "loss": 1.9998, "step": 29703 }, { "epoch": 3.0524044389642415, "grad_norm": 0.06066306680440903, "learning_rate": 0.01, "loss": 2.0395, "step": 29706 }, { "epoch": 3.0527127003699137, "grad_norm": 0.08278308808803558, "learning_rate": 0.01, "loss": 2.0231, "step": 29709 }, { "epoch": 3.053020961775586, "grad_norm": 0.05971555784344673, "learning_rate": 0.01, "loss": 2.0275, "step": 29712 }, { "epoch": 3.0533292231812577, "grad_norm": 0.036974966526031494, "learning_rate": 0.01, "loss": 2.0315, "step": 29715 }, { "epoch": 3.05363748458693, "grad_norm": 0.0310160294175148, "learning_rate": 0.01, "loss": 2.0049, "step": 29718 }, { "epoch": 3.0539457459926016, "grad_norm": 0.03587731346487999, "learning_rate": 0.01, "loss": 2.019, "step": 29721 }, { "epoch": 3.054254007398274, "grad_norm": 0.05497679486870766, "learning_rate": 0.01, "loss": 2.0321, "step": 29724 }, { "epoch": 3.0545622688039455, "grad_norm": 0.06713774055242538, "learning_rate": 0.01, "loss": 1.9943, "step": 29727 }, { "epoch": 3.0548705302096177, "grad_norm": 0.034498222172260284, "learning_rate": 0.01, "loss": 2.0304, "step": 29730 }, { "epoch": 3.05517879161529, "grad_norm": 0.12036336213350296, "learning_rate": 0.01, "loss": 2.0249, "step": 29733 }, { "epoch": 3.0554870530209617, "grad_norm": 0.08315537869930267, "learning_rate": 0.01, "loss": 2.0003, "step": 29736 }, { "epoch": 3.055795314426634, "grad_norm": 0.041693978011608124, "learning_rate": 0.01, "loss": 2.0117, "step": 29739 }, { "epoch": 3.0561035758323056, "grad_norm": 0.08889281749725342, "learning_rate": 0.01, "loss": 2.0256, "step": 29742 }, { "epoch": 3.056411837237978, "grad_norm": 0.07068068534135818, "learning_rate": 0.01, "loss": 2.0018, "step": 29745 }, { "epoch": 3.05672009864365, "grad_norm": 0.09744048863649368, "learning_rate": 0.01, "loss": 2.001, "step": 29748 }, { "epoch": 3.0570283600493218, "grad_norm": 0.04529158025979996, "learning_rate": 0.01, "loss": 2.0084, "step": 29751 }, { "epoch": 3.057336621454994, "grad_norm": 0.041666992008686066, "learning_rate": 0.01, "loss": 2.031, "step": 29754 }, { "epoch": 3.0576448828606657, "grad_norm": 0.07549773156642914, "learning_rate": 0.01, "loss": 2.0274, "step": 29757 }, { "epoch": 3.057953144266338, "grad_norm": 0.046318188309669495, "learning_rate": 0.01, "loss": 2.0132, "step": 29760 }, { "epoch": 3.0582614056720097, "grad_norm": 0.05954563990235329, "learning_rate": 0.01, "loss": 2.01, "step": 29763 }, { "epoch": 3.058569667077682, "grad_norm": 0.03786987066268921, "learning_rate": 0.01, "loss": 1.9981, "step": 29766 }, { "epoch": 3.058877928483354, "grad_norm": 0.04249919578433037, "learning_rate": 0.01, "loss": 2.0168, "step": 29769 }, { "epoch": 3.059186189889026, "grad_norm": 0.09349595755338669, "learning_rate": 0.01, "loss": 2.0365, "step": 29772 }, { "epoch": 3.059494451294698, "grad_norm": 0.05637031048536301, "learning_rate": 0.01, "loss": 1.9966, "step": 29775 }, { "epoch": 3.0598027127003697, "grad_norm": 0.09952693432569504, "learning_rate": 0.01, "loss": 2.0295, "step": 29778 }, { "epoch": 3.060110974106042, "grad_norm": 0.10634247213602066, "learning_rate": 0.01, "loss": 2.0425, "step": 29781 }, { "epoch": 3.060419235511714, "grad_norm": 0.0774141326546669, "learning_rate": 0.01, "loss": 2.0222, "step": 29784 }, { "epoch": 3.060727496917386, "grad_norm": 0.04012331739068031, "learning_rate": 0.01, "loss": 2.021, "step": 29787 }, { "epoch": 3.061035758323058, "grad_norm": 0.04289887100458145, "learning_rate": 0.01, "loss": 2.0115, "step": 29790 }, { "epoch": 3.06134401972873, "grad_norm": 0.03678658604621887, "learning_rate": 0.01, "loss": 2.0269, "step": 29793 }, { "epoch": 3.061652281134402, "grad_norm": 0.034676194190979004, "learning_rate": 0.01, "loss": 2.0426, "step": 29796 }, { "epoch": 3.0619605425400738, "grad_norm": 0.08951381593942642, "learning_rate": 0.01, "loss": 2.001, "step": 29799 }, { "epoch": 3.062268803945746, "grad_norm": 0.053729988634586334, "learning_rate": 0.01, "loss": 2.0108, "step": 29802 }, { "epoch": 3.062577065351418, "grad_norm": 0.08025490492582321, "learning_rate": 0.01, "loss": 2.0091, "step": 29805 }, { "epoch": 3.06288532675709, "grad_norm": 0.11099457740783691, "learning_rate": 0.01, "loss": 2.0134, "step": 29808 }, { "epoch": 3.063193588162762, "grad_norm": 0.13354651629924774, "learning_rate": 0.01, "loss": 2.0417, "step": 29811 }, { "epoch": 3.063501849568434, "grad_norm": 0.07378706336021423, "learning_rate": 0.01, "loss": 2.0169, "step": 29814 }, { "epoch": 3.063810110974106, "grad_norm": 0.04699387028813362, "learning_rate": 0.01, "loss": 2.0211, "step": 29817 }, { "epoch": 3.0641183723797782, "grad_norm": 0.08287378400564194, "learning_rate": 0.01, "loss": 2.0414, "step": 29820 }, { "epoch": 3.06442663378545, "grad_norm": 0.04004284739494324, "learning_rate": 0.01, "loss": 2.0275, "step": 29823 }, { "epoch": 3.064734895191122, "grad_norm": 0.05801365152001381, "learning_rate": 0.01, "loss": 2.0315, "step": 29826 }, { "epoch": 3.065043156596794, "grad_norm": 0.05196039378643036, "learning_rate": 0.01, "loss": 2.0193, "step": 29829 }, { "epoch": 3.065351418002466, "grad_norm": 0.04543706774711609, "learning_rate": 0.01, "loss": 2.0032, "step": 29832 }, { "epoch": 3.065659679408138, "grad_norm": 0.05413155257701874, "learning_rate": 0.01, "loss": 2.0127, "step": 29835 }, { "epoch": 3.06596794081381, "grad_norm": 0.05158795788884163, "learning_rate": 0.01, "loss": 2.0193, "step": 29838 }, { "epoch": 3.0662762022194823, "grad_norm": 0.05547649413347244, "learning_rate": 0.01, "loss": 2.0217, "step": 29841 }, { "epoch": 3.066584463625154, "grad_norm": 0.04968711733818054, "learning_rate": 0.01, "loss": 2.0332, "step": 29844 }, { "epoch": 3.066892725030826, "grad_norm": 0.05625564232468605, "learning_rate": 0.01, "loss": 2.0115, "step": 29847 }, { "epoch": 3.067200986436498, "grad_norm": 0.05007552728056908, "learning_rate": 0.01, "loss": 2.022, "step": 29850 }, { "epoch": 3.06750924784217, "grad_norm": 0.08748306334018707, "learning_rate": 0.01, "loss": 2.0372, "step": 29853 }, { "epoch": 3.0678175092478424, "grad_norm": 0.08939902484416962, "learning_rate": 0.01, "loss": 2.0279, "step": 29856 }, { "epoch": 3.068125770653514, "grad_norm": 0.04412224516272545, "learning_rate": 0.01, "loss": 2.0039, "step": 29859 }, { "epoch": 3.0684340320591863, "grad_norm": 0.04574199765920639, "learning_rate": 0.01, "loss": 2.0204, "step": 29862 }, { "epoch": 3.068742293464858, "grad_norm": 0.059090469032526016, "learning_rate": 0.01, "loss": 2.0097, "step": 29865 }, { "epoch": 3.0690505548705302, "grad_norm": 0.048365235328674316, "learning_rate": 0.01, "loss": 1.988, "step": 29868 }, { "epoch": 3.0693588162762024, "grad_norm": 0.05044705048203468, "learning_rate": 0.01, "loss": 2.0242, "step": 29871 }, { "epoch": 3.069667077681874, "grad_norm": 0.06108652427792549, "learning_rate": 0.01, "loss": 2.0223, "step": 29874 }, { "epoch": 3.0699753390875464, "grad_norm": 0.04400194063782692, "learning_rate": 0.01, "loss": 2.0188, "step": 29877 }, { "epoch": 3.070283600493218, "grad_norm": 0.047024693340063095, "learning_rate": 0.01, "loss": 2.0298, "step": 29880 }, { "epoch": 3.0705918618988903, "grad_norm": 0.06958126276731491, "learning_rate": 0.01, "loss": 2.0122, "step": 29883 }, { "epoch": 3.070900123304562, "grad_norm": 0.07789502292871475, "learning_rate": 0.01, "loss": 2.0022, "step": 29886 }, { "epoch": 3.0712083847102343, "grad_norm": 0.06438666582107544, "learning_rate": 0.01, "loss": 2.035, "step": 29889 }, { "epoch": 3.0715166461159065, "grad_norm": 0.1446814239025116, "learning_rate": 0.01, "loss": 1.9922, "step": 29892 }, { "epoch": 3.071824907521578, "grad_norm": 0.055988240987062454, "learning_rate": 0.01, "loss": 2.0204, "step": 29895 }, { "epoch": 3.0721331689272504, "grad_norm": 0.04055717960000038, "learning_rate": 0.01, "loss": 2.025, "step": 29898 }, { "epoch": 3.072441430332922, "grad_norm": 0.11918565630912781, "learning_rate": 0.01, "loss": 2.0436, "step": 29901 }, { "epoch": 3.0727496917385944, "grad_norm": 0.04726850986480713, "learning_rate": 0.01, "loss": 2.0313, "step": 29904 }, { "epoch": 3.0730579531442666, "grad_norm": 0.0526423342525959, "learning_rate": 0.01, "loss": 2.0541, "step": 29907 }, { "epoch": 3.0733662145499383, "grad_norm": 0.048728957772254944, "learning_rate": 0.01, "loss": 2.0134, "step": 29910 }, { "epoch": 3.0736744759556105, "grad_norm": 0.036348532885313034, "learning_rate": 0.01, "loss": 1.9963, "step": 29913 }, { "epoch": 3.0739827373612822, "grad_norm": 0.08039457350969315, "learning_rate": 0.01, "loss": 1.9541, "step": 29916 }, { "epoch": 3.0742909987669544, "grad_norm": 0.05370686575770378, "learning_rate": 0.01, "loss": 2.0306, "step": 29919 }, { "epoch": 3.074599260172626, "grad_norm": 0.08430914580821991, "learning_rate": 0.01, "loss": 2.0159, "step": 29922 }, { "epoch": 3.0749075215782984, "grad_norm": 0.04675029218196869, "learning_rate": 0.01, "loss": 1.9867, "step": 29925 }, { "epoch": 3.0752157829839706, "grad_norm": 0.0455501526594162, "learning_rate": 0.01, "loss": 2.0117, "step": 29928 }, { "epoch": 3.0755240443896423, "grad_norm": 0.04330252856016159, "learning_rate": 0.01, "loss": 2.0166, "step": 29931 }, { "epoch": 3.0758323057953145, "grad_norm": 0.04302576184272766, "learning_rate": 0.01, "loss": 2.0252, "step": 29934 }, { "epoch": 3.0761405672009863, "grad_norm": 0.07246873527765274, "learning_rate": 0.01, "loss": 2.0118, "step": 29937 }, { "epoch": 3.0764488286066585, "grad_norm": 0.12140212953090668, "learning_rate": 0.01, "loss": 2.0384, "step": 29940 }, { "epoch": 3.0767570900123307, "grad_norm": 0.050940465182065964, "learning_rate": 0.01, "loss": 2.0315, "step": 29943 }, { "epoch": 3.0770653514180024, "grad_norm": 0.08281772583723068, "learning_rate": 0.01, "loss": 2.0209, "step": 29946 }, { "epoch": 3.0773736128236746, "grad_norm": 0.05485936999320984, "learning_rate": 0.01, "loss": 2.0086, "step": 29949 }, { "epoch": 3.0776818742293464, "grad_norm": 0.0354488380253315, "learning_rate": 0.01, "loss": 2.0127, "step": 29952 }, { "epoch": 3.0779901356350186, "grad_norm": 0.04454338923096657, "learning_rate": 0.01, "loss": 2.0225, "step": 29955 }, { "epoch": 3.0782983970406903, "grad_norm": 0.07980217784643173, "learning_rate": 0.01, "loss": 2.0178, "step": 29958 }, { "epoch": 3.0786066584463625, "grad_norm": 0.07601354271173477, "learning_rate": 0.01, "loss": 2.0062, "step": 29961 }, { "epoch": 3.0789149198520347, "grad_norm": 0.036425743252038956, "learning_rate": 0.01, "loss": 2.0094, "step": 29964 }, { "epoch": 3.0792231812577064, "grad_norm": 0.05909854918718338, "learning_rate": 0.01, "loss": 2.0291, "step": 29967 }, { "epoch": 3.0795314426633786, "grad_norm": 0.038156334310770035, "learning_rate": 0.01, "loss": 2.0146, "step": 29970 }, { "epoch": 3.0798397040690504, "grad_norm": 0.03241376578807831, "learning_rate": 0.01, "loss": 2.0132, "step": 29973 }, { "epoch": 3.0801479654747226, "grad_norm": 0.039934635162353516, "learning_rate": 0.01, "loss": 2.0315, "step": 29976 }, { "epoch": 3.0804562268803948, "grad_norm": 0.05287212133407593, "learning_rate": 0.01, "loss": 2.0126, "step": 29979 }, { "epoch": 3.0807644882860665, "grad_norm": 0.0807357057929039, "learning_rate": 0.01, "loss": 2.0162, "step": 29982 }, { "epoch": 3.0810727496917387, "grad_norm": 0.07264538109302521, "learning_rate": 0.01, "loss": 2.0103, "step": 29985 }, { "epoch": 3.0813810110974105, "grad_norm": 0.06328427046537399, "learning_rate": 0.01, "loss": 2.0377, "step": 29988 }, { "epoch": 3.0816892725030827, "grad_norm": 0.05093936249613762, "learning_rate": 0.01, "loss": 2.0318, "step": 29991 }, { "epoch": 3.0819975339087544, "grad_norm": 0.035449884831905365, "learning_rate": 0.01, "loss": 2.0076, "step": 29994 }, { "epoch": 3.0823057953144266, "grad_norm": 0.13781675696372986, "learning_rate": 0.01, "loss": 2.0333, "step": 29997 }, { "epoch": 3.082614056720099, "grad_norm": 0.06310711055994034, "learning_rate": 0.01, "loss": 1.9998, "step": 30000 }, { "epoch": 3.0829223181257706, "grad_norm": 0.059962496161460876, "learning_rate": 0.01, "loss": 2.0033, "step": 30003 }, { "epoch": 3.0832305795314427, "grad_norm": 0.07414010167121887, "learning_rate": 0.01, "loss": 2.0017, "step": 30006 }, { "epoch": 3.0835388409371145, "grad_norm": 0.05147621035575867, "learning_rate": 0.01, "loss": 2.0253, "step": 30009 }, { "epoch": 3.0838471023427867, "grad_norm": 0.08669129014015198, "learning_rate": 0.01, "loss": 2.029, "step": 30012 }, { "epoch": 3.084155363748459, "grad_norm": 0.03791889548301697, "learning_rate": 0.01, "loss": 1.997, "step": 30015 }, { "epoch": 3.0844636251541306, "grad_norm": 0.08246924728155136, "learning_rate": 0.01, "loss": 1.9742, "step": 30018 }, { "epoch": 3.084771886559803, "grad_norm": 0.11454186588525772, "learning_rate": 0.01, "loss": 2.0275, "step": 30021 }, { "epoch": 3.0850801479654746, "grad_norm": 0.07639762759208679, "learning_rate": 0.01, "loss": 2.033, "step": 30024 }, { "epoch": 3.085388409371147, "grad_norm": 0.05283968895673752, "learning_rate": 0.01, "loss": 2.0121, "step": 30027 }, { "epoch": 3.0856966707768185, "grad_norm": 0.041770000010728836, "learning_rate": 0.01, "loss": 2.0196, "step": 30030 }, { "epoch": 3.0860049321824907, "grad_norm": 0.034824103116989136, "learning_rate": 0.01, "loss": 2.0161, "step": 30033 }, { "epoch": 3.086313193588163, "grad_norm": 0.043972861021757126, "learning_rate": 0.01, "loss": 2.0217, "step": 30036 }, { "epoch": 3.0866214549938347, "grad_norm": 0.03452165424823761, "learning_rate": 0.01, "loss": 2.0153, "step": 30039 }, { "epoch": 3.086929716399507, "grad_norm": 0.07175113260746002, "learning_rate": 0.01, "loss": 2.0091, "step": 30042 }, { "epoch": 3.0872379778051786, "grad_norm": 0.0780046209692955, "learning_rate": 0.01, "loss": 2.0127, "step": 30045 }, { "epoch": 3.087546239210851, "grad_norm": 0.06838949769735336, "learning_rate": 0.01, "loss": 2.0108, "step": 30048 }, { "epoch": 3.087854500616523, "grad_norm": 0.114894799888134, "learning_rate": 0.01, "loss": 2.0199, "step": 30051 }, { "epoch": 3.0881627620221948, "grad_norm": 0.10075607150793076, "learning_rate": 0.01, "loss": 2.025, "step": 30054 }, { "epoch": 3.088471023427867, "grad_norm": 0.04926849529147148, "learning_rate": 0.01, "loss": 2.0171, "step": 30057 }, { "epoch": 3.0887792848335387, "grad_norm": 0.04143848270177841, "learning_rate": 0.01, "loss": 2.0275, "step": 30060 }, { "epoch": 3.089087546239211, "grad_norm": 0.04664148762822151, "learning_rate": 0.01, "loss": 2.0227, "step": 30063 }, { "epoch": 3.089395807644883, "grad_norm": 0.04764292761683464, "learning_rate": 0.01, "loss": 2.0408, "step": 30066 }, { "epoch": 3.089704069050555, "grad_norm": 0.06317167729139328, "learning_rate": 0.01, "loss": 2.0177, "step": 30069 }, { "epoch": 3.090012330456227, "grad_norm": 0.07983100414276123, "learning_rate": 0.01, "loss": 2.0285, "step": 30072 }, { "epoch": 3.090320591861899, "grad_norm": 0.03481268137693405, "learning_rate": 0.01, "loss": 2.0441, "step": 30075 }, { "epoch": 3.090628853267571, "grad_norm": 0.042781632393598557, "learning_rate": 0.01, "loss": 2.0278, "step": 30078 }, { "epoch": 3.0909371146732427, "grad_norm": 0.05203581228852272, "learning_rate": 0.01, "loss": 2.0073, "step": 30081 }, { "epoch": 3.091245376078915, "grad_norm": 0.07452182471752167, "learning_rate": 0.01, "loss": 2.031, "step": 30084 }, { "epoch": 3.091553637484587, "grad_norm": 0.05319884046912193, "learning_rate": 0.01, "loss": 2.02, "step": 30087 }, { "epoch": 3.091861898890259, "grad_norm": 0.11648225039243698, "learning_rate": 0.01, "loss": 1.9881, "step": 30090 }, { "epoch": 3.092170160295931, "grad_norm": 0.07870937138795853, "learning_rate": 0.01, "loss": 2.0367, "step": 30093 }, { "epoch": 3.092478421701603, "grad_norm": 0.04574506729841232, "learning_rate": 0.01, "loss": 2.027, "step": 30096 }, { "epoch": 3.092786683107275, "grad_norm": 0.07913927733898163, "learning_rate": 0.01, "loss": 2.0318, "step": 30099 }, { "epoch": 3.0930949445129468, "grad_norm": 0.08093220740556717, "learning_rate": 0.01, "loss": 2.0226, "step": 30102 }, { "epoch": 3.093403205918619, "grad_norm": 0.07089852541685104, "learning_rate": 0.01, "loss": 2.0292, "step": 30105 }, { "epoch": 3.093711467324291, "grad_norm": 0.06293002516031265, "learning_rate": 0.01, "loss": 1.9972, "step": 30108 }, { "epoch": 3.094019728729963, "grad_norm": 0.057953476905822754, "learning_rate": 0.01, "loss": 2.027, "step": 30111 }, { "epoch": 3.094327990135635, "grad_norm": 0.06989496946334839, "learning_rate": 0.01, "loss": 2.0318, "step": 30114 }, { "epoch": 3.094636251541307, "grad_norm": 0.04050515964627266, "learning_rate": 0.01, "loss": 2.0071, "step": 30117 }, { "epoch": 3.094944512946979, "grad_norm": 0.1112913116812706, "learning_rate": 0.01, "loss": 2.0082, "step": 30120 }, { "epoch": 3.0952527743526512, "grad_norm": 0.05135345458984375, "learning_rate": 0.01, "loss": 2.044, "step": 30123 }, { "epoch": 3.095561035758323, "grad_norm": 0.1411667764186859, "learning_rate": 0.01, "loss": 2.0096, "step": 30126 }, { "epoch": 3.095869297163995, "grad_norm": 0.08077210187911987, "learning_rate": 0.01, "loss": 2.0251, "step": 30129 }, { "epoch": 3.096177558569667, "grad_norm": 0.10934364050626755, "learning_rate": 0.01, "loss": 2.0169, "step": 30132 }, { "epoch": 3.096485819975339, "grad_norm": 0.07186675071716309, "learning_rate": 0.01, "loss": 2.0123, "step": 30135 }, { "epoch": 3.0967940813810113, "grad_norm": 0.11918327957391739, "learning_rate": 0.01, "loss": 2.0391, "step": 30138 }, { "epoch": 3.097102342786683, "grad_norm": 0.052199963480234146, "learning_rate": 0.01, "loss": 2.011, "step": 30141 }, { "epoch": 3.0974106041923553, "grad_norm": 0.06796009093523026, "learning_rate": 0.01, "loss": 2.0399, "step": 30144 }, { "epoch": 3.097718865598027, "grad_norm": 0.055949617177248, "learning_rate": 0.01, "loss": 2.0243, "step": 30147 }, { "epoch": 3.098027127003699, "grad_norm": 0.06256923824548721, "learning_rate": 0.01, "loss": 2.0299, "step": 30150 }, { "epoch": 3.098335388409371, "grad_norm": 0.04565596580505371, "learning_rate": 0.01, "loss": 2.0084, "step": 30153 }, { "epoch": 3.098643649815043, "grad_norm": 0.051202937960624695, "learning_rate": 0.01, "loss": 2.0276, "step": 30156 }, { "epoch": 3.0989519112207153, "grad_norm": 0.09557224065065384, "learning_rate": 0.01, "loss": 2.0133, "step": 30159 }, { "epoch": 3.099260172626387, "grad_norm": 0.07491281628608704, "learning_rate": 0.01, "loss": 2.0222, "step": 30162 }, { "epoch": 3.0995684340320593, "grad_norm": 0.06766588240861893, "learning_rate": 0.01, "loss": 2.0165, "step": 30165 }, { "epoch": 3.099876695437731, "grad_norm": 0.036496859043836594, "learning_rate": 0.01, "loss": 2.0135, "step": 30168 }, { "epoch": 3.1001849568434032, "grad_norm": 0.09757262468338013, "learning_rate": 0.01, "loss": 2.0397, "step": 30171 }, { "epoch": 3.1004932182490754, "grad_norm": 0.046625055372714996, "learning_rate": 0.01, "loss": 2.0149, "step": 30174 }, { "epoch": 3.100801479654747, "grad_norm": 0.07655061781406403, "learning_rate": 0.01, "loss": 2.0054, "step": 30177 }, { "epoch": 3.1011097410604194, "grad_norm": 0.04577295854687691, "learning_rate": 0.01, "loss": 2.0192, "step": 30180 }, { "epoch": 3.101418002466091, "grad_norm": 0.04528047516942024, "learning_rate": 0.01, "loss": 2.0181, "step": 30183 }, { "epoch": 3.1017262638717633, "grad_norm": 0.04630058631300926, "learning_rate": 0.01, "loss": 2.0127, "step": 30186 }, { "epoch": 3.102034525277435, "grad_norm": 0.03936396911740303, "learning_rate": 0.01, "loss": 2.0093, "step": 30189 }, { "epoch": 3.1023427866831073, "grad_norm": 0.05879136547446251, "learning_rate": 0.01, "loss": 2.0169, "step": 30192 }, { "epoch": 3.1026510480887795, "grad_norm": 0.061699915677309036, "learning_rate": 0.01, "loss": 2.0418, "step": 30195 }, { "epoch": 3.102959309494451, "grad_norm": 0.05961352214217186, "learning_rate": 0.01, "loss": 1.9985, "step": 30198 }, { "epoch": 3.1032675709001234, "grad_norm": 0.10209197551012039, "learning_rate": 0.01, "loss": 2.0048, "step": 30201 }, { "epoch": 3.103575832305795, "grad_norm": 0.10693737864494324, "learning_rate": 0.01, "loss": 2.0321, "step": 30204 }, { "epoch": 3.1038840937114673, "grad_norm": 0.11183932423591614, "learning_rate": 0.01, "loss": 2.0269, "step": 30207 }, { "epoch": 3.1041923551171395, "grad_norm": 0.07532890886068344, "learning_rate": 0.01, "loss": 2.0284, "step": 30210 }, { "epoch": 3.1045006165228113, "grad_norm": 0.04888713359832764, "learning_rate": 0.01, "loss": 1.9731, "step": 30213 }, { "epoch": 3.1048088779284835, "grad_norm": 0.0533674955368042, "learning_rate": 0.01, "loss": 2.0158, "step": 30216 }, { "epoch": 3.1051171393341552, "grad_norm": 0.044245727360248566, "learning_rate": 0.01, "loss": 2.0006, "step": 30219 }, { "epoch": 3.1054254007398274, "grad_norm": 0.032582368701696396, "learning_rate": 0.01, "loss": 2.0314, "step": 30222 }, { "epoch": 3.105733662145499, "grad_norm": 0.10102632641792297, "learning_rate": 0.01, "loss": 2.0094, "step": 30225 }, { "epoch": 3.1060419235511714, "grad_norm": 0.03793931379914284, "learning_rate": 0.01, "loss": 2.0356, "step": 30228 }, { "epoch": 3.1063501849568436, "grad_norm": 0.04896242544054985, "learning_rate": 0.01, "loss": 2.0167, "step": 30231 }, { "epoch": 3.1066584463625153, "grad_norm": 0.057475607842206955, "learning_rate": 0.01, "loss": 1.9813, "step": 30234 }, { "epoch": 3.1069667077681875, "grad_norm": 0.037703339010477066, "learning_rate": 0.01, "loss": 2.0002, "step": 30237 }, { "epoch": 3.1072749691738593, "grad_norm": 0.04800937697291374, "learning_rate": 0.01, "loss": 2.043, "step": 30240 }, { "epoch": 3.1075832305795315, "grad_norm": 0.042761534452438354, "learning_rate": 0.01, "loss": 2.0208, "step": 30243 }, { "epoch": 3.1078914919852036, "grad_norm": 0.0864332839846611, "learning_rate": 0.01, "loss": 2.0229, "step": 30246 }, { "epoch": 3.1081997533908754, "grad_norm": 0.10482830554246902, "learning_rate": 0.01, "loss": 2.0601, "step": 30249 }, { "epoch": 3.1085080147965476, "grad_norm": 0.10628984868526459, "learning_rate": 0.01, "loss": 2.0078, "step": 30252 }, { "epoch": 3.1088162762022193, "grad_norm": 0.04853438585996628, "learning_rate": 0.01, "loss": 1.9999, "step": 30255 }, { "epoch": 3.1091245376078915, "grad_norm": 0.05274122208356857, "learning_rate": 0.01, "loss": 1.9926, "step": 30258 }, { "epoch": 3.1094327990135637, "grad_norm": 0.03757349029183388, "learning_rate": 0.01, "loss": 2.003, "step": 30261 }, { "epoch": 3.1097410604192355, "grad_norm": 0.04505928233265877, "learning_rate": 0.01, "loss": 2.0126, "step": 30264 }, { "epoch": 3.1100493218249077, "grad_norm": 0.08804783225059509, "learning_rate": 0.01, "loss": 2.0285, "step": 30267 }, { "epoch": 3.1103575832305794, "grad_norm": 0.11545568704605103, "learning_rate": 0.01, "loss": 2.0105, "step": 30270 }, { "epoch": 3.1106658446362516, "grad_norm": 0.04955466091632843, "learning_rate": 0.01, "loss": 2.048, "step": 30273 }, { "epoch": 3.1109741060419234, "grad_norm": 0.04930401220917702, "learning_rate": 0.01, "loss": 2.0223, "step": 30276 }, { "epoch": 3.1112823674475956, "grad_norm": 0.05701540783047676, "learning_rate": 0.01, "loss": 2.046, "step": 30279 }, { "epoch": 3.1115906288532678, "grad_norm": 0.07442028075456619, "learning_rate": 0.01, "loss": 2.0085, "step": 30282 }, { "epoch": 3.1118988902589395, "grad_norm": 0.06538541615009308, "learning_rate": 0.01, "loss": 2.0194, "step": 30285 }, { "epoch": 3.1122071516646117, "grad_norm": 0.0826522633433342, "learning_rate": 0.01, "loss": 2.0267, "step": 30288 }, { "epoch": 3.1125154130702835, "grad_norm": 0.1353752762079239, "learning_rate": 0.01, "loss": 2.0089, "step": 30291 }, { "epoch": 3.1128236744759556, "grad_norm": 0.06406208872795105, "learning_rate": 0.01, "loss": 2.0174, "step": 30294 }, { "epoch": 3.1131319358816274, "grad_norm": 0.036653418093919754, "learning_rate": 0.01, "loss": 1.9951, "step": 30297 }, { "epoch": 3.1134401972872996, "grad_norm": 0.0839834213256836, "learning_rate": 0.01, "loss": 2.0213, "step": 30300 }, { "epoch": 3.113748458692972, "grad_norm": 0.04629015177488327, "learning_rate": 0.01, "loss": 2.0488, "step": 30303 }, { "epoch": 3.1140567200986435, "grad_norm": 0.08630986511707306, "learning_rate": 0.01, "loss": 2.0271, "step": 30306 }, { "epoch": 3.1143649815043157, "grad_norm": 0.07894504070281982, "learning_rate": 0.01, "loss": 2.0128, "step": 30309 }, { "epoch": 3.1146732429099875, "grad_norm": 0.09203556925058365, "learning_rate": 0.01, "loss": 2.0094, "step": 30312 }, { "epoch": 3.1149815043156597, "grad_norm": 0.061055563390254974, "learning_rate": 0.01, "loss": 2.0199, "step": 30315 }, { "epoch": 3.115289765721332, "grad_norm": 0.0711871087551117, "learning_rate": 0.01, "loss": 2.0091, "step": 30318 }, { "epoch": 3.1155980271270036, "grad_norm": 0.07315775007009506, "learning_rate": 0.01, "loss": 2.0209, "step": 30321 }, { "epoch": 3.115906288532676, "grad_norm": 0.1050182655453682, "learning_rate": 0.01, "loss": 2.0086, "step": 30324 }, { "epoch": 3.1162145499383476, "grad_norm": 0.04959526285529137, "learning_rate": 0.01, "loss": 1.9827, "step": 30327 }, { "epoch": 3.1165228113440198, "grad_norm": 0.061066534370183945, "learning_rate": 0.01, "loss": 2.0258, "step": 30330 }, { "epoch": 3.116831072749692, "grad_norm": 0.057984329760074615, "learning_rate": 0.01, "loss": 2.0119, "step": 30333 }, { "epoch": 3.1171393341553637, "grad_norm": 0.07337040454149246, "learning_rate": 0.01, "loss": 2.0069, "step": 30336 }, { "epoch": 3.117447595561036, "grad_norm": 0.07801029831171036, "learning_rate": 0.01, "loss": 2.0172, "step": 30339 }, { "epoch": 3.1177558569667077, "grad_norm": 0.06625111401081085, "learning_rate": 0.01, "loss": 2.022, "step": 30342 }, { "epoch": 3.11806411837238, "grad_norm": 0.11416434496641159, "learning_rate": 0.01, "loss": 2.0304, "step": 30345 }, { "epoch": 3.1183723797780516, "grad_norm": 0.05046262592077255, "learning_rate": 0.01, "loss": 2.0064, "step": 30348 }, { "epoch": 3.118680641183724, "grad_norm": 0.030621282756328583, "learning_rate": 0.01, "loss": 2.0085, "step": 30351 }, { "epoch": 3.118988902589396, "grad_norm": 0.03274908661842346, "learning_rate": 0.01, "loss": 2.0164, "step": 30354 }, { "epoch": 3.1192971639950677, "grad_norm": 0.03673490136861801, "learning_rate": 0.01, "loss": 2.0383, "step": 30357 }, { "epoch": 3.11960542540074, "grad_norm": 0.07161369919776917, "learning_rate": 0.01, "loss": 2.0122, "step": 30360 }, { "epoch": 3.1199136868064117, "grad_norm": 0.04244636744260788, "learning_rate": 0.01, "loss": 1.9901, "step": 30363 }, { "epoch": 3.120221948212084, "grad_norm": 0.05357150360941887, "learning_rate": 0.01, "loss": 1.9994, "step": 30366 }, { "epoch": 3.120530209617756, "grad_norm": 0.06689538061618805, "learning_rate": 0.01, "loss": 2.008, "step": 30369 }, { "epoch": 3.120838471023428, "grad_norm": 0.08160898089408875, "learning_rate": 0.01, "loss": 2.0189, "step": 30372 }, { "epoch": 3.1211467324291, "grad_norm": 0.04037999361753464, "learning_rate": 0.01, "loss": 1.993, "step": 30375 }, { "epoch": 3.1214549938347718, "grad_norm": 0.09855981171131134, "learning_rate": 0.01, "loss": 2.0157, "step": 30378 }, { "epoch": 3.121763255240444, "grad_norm": 0.04645252600312233, "learning_rate": 0.01, "loss": 2.0211, "step": 30381 }, { "epoch": 3.1220715166461157, "grad_norm": 0.10605467110872269, "learning_rate": 0.01, "loss": 2.015, "step": 30384 }, { "epoch": 3.122379778051788, "grad_norm": 0.11321547627449036, "learning_rate": 0.01, "loss": 2.0193, "step": 30387 }, { "epoch": 3.12268803945746, "grad_norm": 0.07283324748277664, "learning_rate": 0.01, "loss": 2.0327, "step": 30390 }, { "epoch": 3.122996300863132, "grad_norm": 0.07217562943696976, "learning_rate": 0.01, "loss": 2.0079, "step": 30393 }, { "epoch": 3.123304562268804, "grad_norm": 0.036078888922929764, "learning_rate": 0.01, "loss": 2.0211, "step": 30396 }, { "epoch": 3.123612823674476, "grad_norm": 0.03730477765202522, "learning_rate": 0.01, "loss": 2.0077, "step": 30399 }, { "epoch": 3.123921085080148, "grad_norm": 0.04707048460841179, "learning_rate": 0.01, "loss": 2.0204, "step": 30402 }, { "epoch": 3.12422934648582, "grad_norm": 0.0571308508515358, "learning_rate": 0.01, "loss": 2.0054, "step": 30405 }, { "epoch": 3.124537607891492, "grad_norm": 0.05167640373110771, "learning_rate": 0.01, "loss": 2.0134, "step": 30408 }, { "epoch": 3.124845869297164, "grad_norm": 0.06759858876466751, "learning_rate": 0.01, "loss": 2.0286, "step": 30411 }, { "epoch": 3.125154130702836, "grad_norm": 0.031637318432331085, "learning_rate": 0.01, "loss": 1.9848, "step": 30414 }, { "epoch": 3.125462392108508, "grad_norm": 0.0991082713007927, "learning_rate": 0.01, "loss": 2.0277, "step": 30417 }, { "epoch": 3.12577065351418, "grad_norm": 0.06524378806352615, "learning_rate": 0.01, "loss": 1.9995, "step": 30420 }, { "epoch": 3.126078914919852, "grad_norm": 0.05751586705446243, "learning_rate": 0.01, "loss": 2.012, "step": 30423 }, { "epoch": 3.126387176325524, "grad_norm": 0.03776116296648979, "learning_rate": 0.01, "loss": 2.0291, "step": 30426 }, { "epoch": 3.126695437731196, "grad_norm": 0.0619078204035759, "learning_rate": 0.01, "loss": 1.9884, "step": 30429 }, { "epoch": 3.127003699136868, "grad_norm": 0.0511760450899601, "learning_rate": 0.01, "loss": 2.0142, "step": 30432 }, { "epoch": 3.12731196054254, "grad_norm": 0.053964611142873764, "learning_rate": 0.01, "loss": 2.0361, "step": 30435 }, { "epoch": 3.127620221948212, "grad_norm": 0.11773833632469177, "learning_rate": 0.01, "loss": 2.0298, "step": 30438 }, { "epoch": 3.1279284833538843, "grad_norm": 0.04505350813269615, "learning_rate": 0.01, "loss": 2.0162, "step": 30441 }, { "epoch": 3.128236744759556, "grad_norm": 0.10077274590730667, "learning_rate": 0.01, "loss": 2.0101, "step": 30444 }, { "epoch": 3.1285450061652282, "grad_norm": 0.05291616916656494, "learning_rate": 0.01, "loss": 2.0148, "step": 30447 }, { "epoch": 3.1288532675709, "grad_norm": 0.049340371042490005, "learning_rate": 0.01, "loss": 2.0101, "step": 30450 }, { "epoch": 3.129161528976572, "grad_norm": 0.06910324841737747, "learning_rate": 0.01, "loss": 2.008, "step": 30453 }, { "epoch": 3.1294697903822444, "grad_norm": 0.07307056337594986, "learning_rate": 0.01, "loss": 1.9933, "step": 30456 }, { "epoch": 3.129778051787916, "grad_norm": 0.06828980147838593, "learning_rate": 0.01, "loss": 2.0343, "step": 30459 }, { "epoch": 3.1300863131935883, "grad_norm": 0.0480424240231514, "learning_rate": 0.01, "loss": 2.0207, "step": 30462 }, { "epoch": 3.13039457459926, "grad_norm": 0.10072044283151627, "learning_rate": 0.01, "loss": 2.0244, "step": 30465 }, { "epoch": 3.1307028360049323, "grad_norm": 0.04987982660531998, "learning_rate": 0.01, "loss": 2.0056, "step": 30468 }, { "epoch": 3.131011097410604, "grad_norm": 0.10036738961935043, "learning_rate": 0.01, "loss": 2.0069, "step": 30471 }, { "epoch": 3.131319358816276, "grad_norm": 0.06445086747407913, "learning_rate": 0.01, "loss": 2.0196, "step": 30474 }, { "epoch": 3.1316276202219484, "grad_norm": 0.10858535021543503, "learning_rate": 0.01, "loss": 2.0159, "step": 30477 }, { "epoch": 3.13193588162762, "grad_norm": 0.041025932878255844, "learning_rate": 0.01, "loss": 2.0156, "step": 30480 }, { "epoch": 3.1322441430332923, "grad_norm": 0.05287109315395355, "learning_rate": 0.01, "loss": 2.0143, "step": 30483 }, { "epoch": 3.132552404438964, "grad_norm": 0.09659206867218018, "learning_rate": 0.01, "loss": 1.9991, "step": 30486 }, { "epoch": 3.1328606658446363, "grad_norm": 0.07235360145568848, "learning_rate": 0.01, "loss": 2.0521, "step": 30489 }, { "epoch": 3.133168927250308, "grad_norm": 0.042338863015174866, "learning_rate": 0.01, "loss": 2.0403, "step": 30492 }, { "epoch": 3.1334771886559802, "grad_norm": 0.045676395297050476, "learning_rate": 0.01, "loss": 1.9921, "step": 30495 }, { "epoch": 3.1337854500616524, "grad_norm": 0.04957246780395508, "learning_rate": 0.01, "loss": 2.0095, "step": 30498 }, { "epoch": 3.134093711467324, "grad_norm": 0.047321684658527374, "learning_rate": 0.01, "loss": 2.025, "step": 30501 }, { "epoch": 3.1344019728729964, "grad_norm": 0.09219278395175934, "learning_rate": 0.01, "loss": 2.0326, "step": 30504 }, { "epoch": 3.134710234278668, "grad_norm": 0.07666533440351486, "learning_rate": 0.01, "loss": 2.0132, "step": 30507 }, { "epoch": 3.1350184956843403, "grad_norm": 0.06832748651504517, "learning_rate": 0.01, "loss": 2.0074, "step": 30510 }, { "epoch": 3.1353267570900125, "grad_norm": 0.05527606979012489, "learning_rate": 0.01, "loss": 1.9941, "step": 30513 }, { "epoch": 3.1356350184956843, "grad_norm": 0.09272732585668564, "learning_rate": 0.01, "loss": 1.9942, "step": 30516 }, { "epoch": 3.1359432799013565, "grad_norm": 0.02908925898373127, "learning_rate": 0.01, "loss": 2.0173, "step": 30519 }, { "epoch": 3.136251541307028, "grad_norm": 0.04611453413963318, "learning_rate": 0.01, "loss": 2.0332, "step": 30522 }, { "epoch": 3.1365598027127004, "grad_norm": 0.05230382829904556, "learning_rate": 0.01, "loss": 2.0226, "step": 30525 }, { "epoch": 3.1368680641183726, "grad_norm": 0.09377764165401459, "learning_rate": 0.01, "loss": 2.0395, "step": 30528 }, { "epoch": 3.1371763255240444, "grad_norm": 0.049708276987075806, "learning_rate": 0.01, "loss": 1.996, "step": 30531 }, { "epoch": 3.1374845869297165, "grad_norm": 0.05086242035031319, "learning_rate": 0.01, "loss": 1.9855, "step": 30534 }, { "epoch": 3.1377928483353883, "grad_norm": 0.05085150897502899, "learning_rate": 0.01, "loss": 1.9968, "step": 30537 }, { "epoch": 3.1381011097410605, "grad_norm": 0.05230359733104706, "learning_rate": 0.01, "loss": 2.0079, "step": 30540 }, { "epoch": 3.1384093711467322, "grad_norm": 0.07910077273845673, "learning_rate": 0.01, "loss": 2.0075, "step": 30543 }, { "epoch": 3.1387176325524044, "grad_norm": 0.044358085840940475, "learning_rate": 0.01, "loss": 2.0236, "step": 30546 }, { "epoch": 3.1390258939580766, "grad_norm": 0.07269710302352905, "learning_rate": 0.01, "loss": 1.9995, "step": 30549 }, { "epoch": 3.1393341553637484, "grad_norm": 0.09917914122343063, "learning_rate": 0.01, "loss": 2.0233, "step": 30552 }, { "epoch": 3.1396424167694206, "grad_norm": 0.0755099207162857, "learning_rate": 0.01, "loss": 2.0113, "step": 30555 }, { "epoch": 3.1399506781750923, "grad_norm": 0.07911227643489838, "learning_rate": 0.01, "loss": 1.9751, "step": 30558 }, { "epoch": 3.1402589395807645, "grad_norm": 0.04883533716201782, "learning_rate": 0.01, "loss": 2.0247, "step": 30561 }, { "epoch": 3.1405672009864363, "grad_norm": 0.0375591404736042, "learning_rate": 0.01, "loss": 2.004, "step": 30564 }, { "epoch": 3.1408754623921085, "grad_norm": 0.08654747903347015, "learning_rate": 0.01, "loss": 2.0315, "step": 30567 }, { "epoch": 3.1411837237977807, "grad_norm": 0.07025197148323059, "learning_rate": 0.01, "loss": 2.0334, "step": 30570 }, { "epoch": 3.1414919852034524, "grad_norm": 0.11750215291976929, "learning_rate": 0.01, "loss": 1.9902, "step": 30573 }, { "epoch": 3.1418002466091246, "grad_norm": 0.037444472312927246, "learning_rate": 0.01, "loss": 2.0272, "step": 30576 }, { "epoch": 3.1421085080147964, "grad_norm": 0.044617678970098495, "learning_rate": 0.01, "loss": 2.0304, "step": 30579 }, { "epoch": 3.1424167694204685, "grad_norm": 0.06604386866092682, "learning_rate": 0.01, "loss": 2.0153, "step": 30582 }, { "epoch": 3.1427250308261407, "grad_norm": 0.09958125650882721, "learning_rate": 0.01, "loss": 2.0217, "step": 30585 }, { "epoch": 3.1430332922318125, "grad_norm": 0.05710573121905327, "learning_rate": 0.01, "loss": 2.0199, "step": 30588 }, { "epoch": 3.1433415536374847, "grad_norm": 0.05984263867139816, "learning_rate": 0.01, "loss": 2.0091, "step": 30591 }, { "epoch": 3.1436498150431564, "grad_norm": 0.04073350876569748, "learning_rate": 0.01, "loss": 2.032, "step": 30594 }, { "epoch": 3.1439580764488286, "grad_norm": 0.07050776481628418, "learning_rate": 0.01, "loss": 1.9977, "step": 30597 }, { "epoch": 3.144266337854501, "grad_norm": 0.03400518372654915, "learning_rate": 0.01, "loss": 2.0404, "step": 30600 }, { "epoch": 3.1445745992601726, "grad_norm": 0.06751801073551178, "learning_rate": 0.01, "loss": 2.0138, "step": 30603 }, { "epoch": 3.1448828606658448, "grad_norm": 0.06015434488654137, "learning_rate": 0.01, "loss": 1.9956, "step": 30606 }, { "epoch": 3.1451911220715165, "grad_norm": 0.11231853067874908, "learning_rate": 0.01, "loss": 2.0367, "step": 30609 }, { "epoch": 3.1454993834771887, "grad_norm": 0.047932934015989304, "learning_rate": 0.01, "loss": 2.0133, "step": 30612 }, { "epoch": 3.1458076448828605, "grad_norm": 0.04414699971675873, "learning_rate": 0.01, "loss": 2.0155, "step": 30615 }, { "epoch": 3.1461159062885327, "grad_norm": 0.06486550718545914, "learning_rate": 0.01, "loss": 2.0251, "step": 30618 }, { "epoch": 3.146424167694205, "grad_norm": 0.056324832141399384, "learning_rate": 0.01, "loss": 2.0137, "step": 30621 }, { "epoch": 3.1467324290998766, "grad_norm": 0.030867785215377808, "learning_rate": 0.01, "loss": 2.0339, "step": 30624 }, { "epoch": 3.147040690505549, "grad_norm": 0.050489556044340134, "learning_rate": 0.01, "loss": 2.0233, "step": 30627 }, { "epoch": 3.1473489519112205, "grad_norm": 0.04749634861946106, "learning_rate": 0.01, "loss": 2.0061, "step": 30630 }, { "epoch": 3.1476572133168927, "grad_norm": 0.05312662571668625, "learning_rate": 0.01, "loss": 1.9881, "step": 30633 }, { "epoch": 3.147965474722565, "grad_norm": 0.1234770268201828, "learning_rate": 0.01, "loss": 2.0119, "step": 30636 }, { "epoch": 3.1482737361282367, "grad_norm": 0.1423310786485672, "learning_rate": 0.01, "loss": 1.99, "step": 30639 }, { "epoch": 3.148581997533909, "grad_norm": 0.08930431306362152, "learning_rate": 0.01, "loss": 2.023, "step": 30642 }, { "epoch": 3.1488902589395806, "grad_norm": 0.03335024416446686, "learning_rate": 0.01, "loss": 2.0072, "step": 30645 }, { "epoch": 3.149198520345253, "grad_norm": 0.07785200327634811, "learning_rate": 0.01, "loss": 2.0083, "step": 30648 }, { "epoch": 3.1495067817509246, "grad_norm": 0.047731272876262665, "learning_rate": 0.01, "loss": 1.9999, "step": 30651 }, { "epoch": 3.1498150431565968, "grad_norm": 0.10476487874984741, "learning_rate": 0.01, "loss": 2.0374, "step": 30654 }, { "epoch": 3.150123304562269, "grad_norm": 0.05200812220573425, "learning_rate": 0.01, "loss": 2.0204, "step": 30657 }, { "epoch": 3.1504315659679407, "grad_norm": 0.06658156216144562, "learning_rate": 0.01, "loss": 2.0331, "step": 30660 }, { "epoch": 3.150739827373613, "grad_norm": 0.06515184789896011, "learning_rate": 0.01, "loss": 2.0248, "step": 30663 }, { "epoch": 3.1510480887792847, "grad_norm": 0.03604321926832199, "learning_rate": 0.01, "loss": 2.0279, "step": 30666 }, { "epoch": 3.151356350184957, "grad_norm": 0.10496728122234344, "learning_rate": 0.01, "loss": 2.016, "step": 30669 }, { "epoch": 3.151664611590629, "grad_norm": 0.05922207236289978, "learning_rate": 0.01, "loss": 1.9988, "step": 30672 }, { "epoch": 3.151972872996301, "grad_norm": 0.03718853369355202, "learning_rate": 0.01, "loss": 2.0313, "step": 30675 }, { "epoch": 3.152281134401973, "grad_norm": 0.04691898077726364, "learning_rate": 0.01, "loss": 1.9755, "step": 30678 }, { "epoch": 3.1525893958076447, "grad_norm": 0.048930030316114426, "learning_rate": 0.01, "loss": 2.0079, "step": 30681 }, { "epoch": 3.152897657213317, "grad_norm": 0.028419634327292442, "learning_rate": 0.01, "loss": 1.9928, "step": 30684 }, { "epoch": 3.1532059186189887, "grad_norm": 0.05914349481463432, "learning_rate": 0.01, "loss": 2.004, "step": 30687 }, { "epoch": 3.153514180024661, "grad_norm": 0.05946047231554985, "learning_rate": 0.01, "loss": 2.025, "step": 30690 }, { "epoch": 3.153822441430333, "grad_norm": 0.06951475888490677, "learning_rate": 0.01, "loss": 2.0341, "step": 30693 }, { "epoch": 3.154130702836005, "grad_norm": 0.055885329842567444, "learning_rate": 0.01, "loss": 2.0335, "step": 30696 }, { "epoch": 3.154438964241677, "grad_norm": 0.043436676263809204, "learning_rate": 0.01, "loss": 1.9812, "step": 30699 }, { "epoch": 3.1547472256473488, "grad_norm": 0.04978411644697189, "learning_rate": 0.01, "loss": 2.0117, "step": 30702 }, { "epoch": 3.155055487053021, "grad_norm": 0.11427465081214905, "learning_rate": 0.01, "loss": 2.0179, "step": 30705 }, { "epoch": 3.155363748458693, "grad_norm": 0.045963071286678314, "learning_rate": 0.01, "loss": 1.9926, "step": 30708 }, { "epoch": 3.155672009864365, "grad_norm": 0.03811359032988548, "learning_rate": 0.01, "loss": 2.0241, "step": 30711 }, { "epoch": 3.155980271270037, "grad_norm": 0.05763382837176323, "learning_rate": 0.01, "loss": 2.0158, "step": 30714 }, { "epoch": 3.156288532675709, "grad_norm": 0.045626237988471985, "learning_rate": 0.01, "loss": 2.0033, "step": 30717 }, { "epoch": 3.156596794081381, "grad_norm": 0.037544216960668564, "learning_rate": 0.01, "loss": 1.9964, "step": 30720 }, { "epoch": 3.1569050554870532, "grad_norm": 0.14238816499710083, "learning_rate": 0.01, "loss": 2.0342, "step": 30723 }, { "epoch": 3.157213316892725, "grad_norm": 0.048318054527044296, "learning_rate": 0.01, "loss": 2.0181, "step": 30726 }, { "epoch": 3.157521578298397, "grad_norm": 0.041052673012018204, "learning_rate": 0.01, "loss": 1.9813, "step": 30729 }, { "epoch": 3.157829839704069, "grad_norm": 0.0370815135538578, "learning_rate": 0.01, "loss": 2.0042, "step": 30732 }, { "epoch": 3.158138101109741, "grad_norm": 0.07156988233327866, "learning_rate": 0.01, "loss": 2.0374, "step": 30735 }, { "epoch": 3.158446362515413, "grad_norm": 0.0757046490907669, "learning_rate": 0.01, "loss": 2.0147, "step": 30738 }, { "epoch": 3.158754623921085, "grad_norm": 0.0674174353480339, "learning_rate": 0.01, "loss": 1.9951, "step": 30741 }, { "epoch": 3.1590628853267573, "grad_norm": 0.05094176158308983, "learning_rate": 0.01, "loss": 2.0196, "step": 30744 }, { "epoch": 3.159371146732429, "grad_norm": 0.04377339780330658, "learning_rate": 0.01, "loss": 2.0129, "step": 30747 }, { "epoch": 3.159679408138101, "grad_norm": 0.04788428172469139, "learning_rate": 0.01, "loss": 2.032, "step": 30750 }, { "epoch": 3.159987669543773, "grad_norm": 0.08424562215805054, "learning_rate": 0.01, "loss": 1.9851, "step": 30753 }, { "epoch": 3.160295930949445, "grad_norm": 0.0742245689034462, "learning_rate": 0.01, "loss": 1.9942, "step": 30756 }, { "epoch": 3.160604192355117, "grad_norm": 0.127692312002182, "learning_rate": 0.01, "loss": 2.0294, "step": 30759 }, { "epoch": 3.160912453760789, "grad_norm": 0.06112000718712807, "learning_rate": 0.01, "loss": 2.0087, "step": 30762 }, { "epoch": 3.1612207151664613, "grad_norm": 0.04676929488778114, "learning_rate": 0.01, "loss": 2.0117, "step": 30765 }, { "epoch": 3.161528976572133, "grad_norm": 0.07944846153259277, "learning_rate": 0.01, "loss": 2.0239, "step": 30768 }, { "epoch": 3.1618372379778052, "grad_norm": 0.05377965793013573, "learning_rate": 0.01, "loss": 2.0424, "step": 30771 }, { "epoch": 3.162145499383477, "grad_norm": 0.04961777105927467, "learning_rate": 0.01, "loss": 1.9989, "step": 30774 }, { "epoch": 3.162453760789149, "grad_norm": 0.043640993535518646, "learning_rate": 0.01, "loss": 1.9938, "step": 30777 }, { "epoch": 3.1627620221948214, "grad_norm": 0.08145361393690109, "learning_rate": 0.01, "loss": 2.0253, "step": 30780 }, { "epoch": 3.163070283600493, "grad_norm": 0.03488341346383095, "learning_rate": 0.01, "loss": 1.9797, "step": 30783 }, { "epoch": 3.1633785450061653, "grad_norm": 0.054735999554395676, "learning_rate": 0.01, "loss": 2.0142, "step": 30786 }, { "epoch": 3.163686806411837, "grad_norm": 0.05064836144447327, "learning_rate": 0.01, "loss": 2.0255, "step": 30789 }, { "epoch": 3.1639950678175093, "grad_norm": 0.05693964287638664, "learning_rate": 0.01, "loss": 2.0073, "step": 30792 }, { "epoch": 3.1643033292231815, "grad_norm": 0.08762123435735703, "learning_rate": 0.01, "loss": 2.0049, "step": 30795 }, { "epoch": 3.164611590628853, "grad_norm": 0.08652741461992264, "learning_rate": 0.01, "loss": 2.0021, "step": 30798 }, { "epoch": 3.1649198520345254, "grad_norm": 0.15153749287128448, "learning_rate": 0.01, "loss": 2.0326, "step": 30801 }, { "epoch": 3.165228113440197, "grad_norm": 0.09822986274957657, "learning_rate": 0.01, "loss": 2.0232, "step": 30804 }, { "epoch": 3.1655363748458694, "grad_norm": 0.07334254682064056, "learning_rate": 0.01, "loss": 2.0176, "step": 30807 }, { "epoch": 3.165844636251541, "grad_norm": 0.042224787175655365, "learning_rate": 0.01, "loss": 2.0277, "step": 30810 }, { "epoch": 3.1661528976572133, "grad_norm": 0.047276921570301056, "learning_rate": 0.01, "loss": 2.0014, "step": 30813 }, { "epoch": 3.1664611590628855, "grad_norm": 0.112834133207798, "learning_rate": 0.01, "loss": 1.998, "step": 30816 }, { "epoch": 3.1667694204685573, "grad_norm": 0.0851617380976677, "learning_rate": 0.01, "loss": 2.013, "step": 30819 }, { "epoch": 3.1670776818742294, "grad_norm": 0.046194661408662796, "learning_rate": 0.01, "loss": 2.0029, "step": 30822 }, { "epoch": 3.167385943279901, "grad_norm": 0.056417178362607956, "learning_rate": 0.01, "loss": 2.0171, "step": 30825 }, { "epoch": 3.1676942046855734, "grad_norm": 0.06759685277938843, "learning_rate": 0.01, "loss": 2.0281, "step": 30828 }, { "epoch": 3.1680024660912456, "grad_norm": 0.08961043506860733, "learning_rate": 0.01, "loss": 2.0222, "step": 30831 }, { "epoch": 3.1683107274969173, "grad_norm": 0.03684352710843086, "learning_rate": 0.01, "loss": 2.0037, "step": 30834 }, { "epoch": 3.1686189889025895, "grad_norm": 0.1388491988182068, "learning_rate": 0.01, "loss": 2.0304, "step": 30837 }, { "epoch": 3.1689272503082613, "grad_norm": 0.03769170120358467, "learning_rate": 0.01, "loss": 1.9998, "step": 30840 }, { "epoch": 3.1692355117139335, "grad_norm": 0.040609996765851974, "learning_rate": 0.01, "loss": 2.0208, "step": 30843 }, { "epoch": 3.1695437731196052, "grad_norm": 0.04916587471961975, "learning_rate": 0.01, "loss": 2.0249, "step": 30846 }, { "epoch": 3.1698520345252774, "grad_norm": 0.042920999228954315, "learning_rate": 0.01, "loss": 2.0078, "step": 30849 }, { "epoch": 3.1701602959309496, "grad_norm": 0.05777138099074364, "learning_rate": 0.01, "loss": 2.019, "step": 30852 }, { "epoch": 3.1704685573366214, "grad_norm": 0.06128811836242676, "learning_rate": 0.01, "loss": 2.0123, "step": 30855 }, { "epoch": 3.1707768187422936, "grad_norm": 0.08042391389608383, "learning_rate": 0.01, "loss": 2.0123, "step": 30858 }, { "epoch": 3.1710850801479653, "grad_norm": 0.036383189260959625, "learning_rate": 0.01, "loss": 2.0011, "step": 30861 }, { "epoch": 3.1713933415536375, "grad_norm": 0.05068094655871391, "learning_rate": 0.01, "loss": 1.9922, "step": 30864 }, { "epoch": 3.1717016029593097, "grad_norm": 0.09005576372146606, "learning_rate": 0.01, "loss": 2.048, "step": 30867 }, { "epoch": 3.1720098643649814, "grad_norm": 0.0589495450258255, "learning_rate": 0.01, "loss": 2.0227, "step": 30870 }, { "epoch": 3.1723181257706536, "grad_norm": 0.04160517826676369, "learning_rate": 0.01, "loss": 1.9987, "step": 30873 }, { "epoch": 3.1726263871763254, "grad_norm": 0.044718582183122635, "learning_rate": 0.01, "loss": 1.9895, "step": 30876 }, { "epoch": 3.1729346485819976, "grad_norm": 0.09011929482221603, "learning_rate": 0.01, "loss": 2.0076, "step": 30879 }, { "epoch": 3.1732429099876693, "grad_norm": 0.0597953200340271, "learning_rate": 0.01, "loss": 1.9974, "step": 30882 }, { "epoch": 3.1735511713933415, "grad_norm": 0.06141204759478569, "learning_rate": 0.01, "loss": 2.0141, "step": 30885 }, { "epoch": 3.1738594327990137, "grad_norm": 0.09013784676790237, "learning_rate": 0.01, "loss": 2.0159, "step": 30888 }, { "epoch": 3.1741676942046855, "grad_norm": 0.08281320333480835, "learning_rate": 0.01, "loss": 2.0239, "step": 30891 }, { "epoch": 3.1744759556103577, "grad_norm": 0.09314266592264175, "learning_rate": 0.01, "loss": 2.0224, "step": 30894 }, { "epoch": 3.1747842170160294, "grad_norm": 0.04410851374268532, "learning_rate": 0.01, "loss": 2.0152, "step": 30897 }, { "epoch": 3.1750924784217016, "grad_norm": 0.12074366211891174, "learning_rate": 0.01, "loss": 2.0385, "step": 30900 }, { "epoch": 3.175400739827374, "grad_norm": 0.06861037015914917, "learning_rate": 0.01, "loss": 2.0024, "step": 30903 }, { "epoch": 3.1757090012330456, "grad_norm": 0.038134749978780746, "learning_rate": 0.01, "loss": 1.9987, "step": 30906 }, { "epoch": 3.1760172626387178, "grad_norm": 0.11108820140361786, "learning_rate": 0.01, "loss": 2.0149, "step": 30909 }, { "epoch": 3.1763255240443895, "grad_norm": 0.047687213867902756, "learning_rate": 0.01, "loss": 2.0063, "step": 30912 }, { "epoch": 3.1766337854500617, "grad_norm": 0.05983440950512886, "learning_rate": 0.01, "loss": 2.0099, "step": 30915 }, { "epoch": 3.176942046855734, "grad_norm": 0.038930587470531464, "learning_rate": 0.01, "loss": 2.0122, "step": 30918 }, { "epoch": 3.1772503082614056, "grad_norm": 0.08851155638694763, "learning_rate": 0.01, "loss": 2.0077, "step": 30921 }, { "epoch": 3.177558569667078, "grad_norm": 0.08430106192827225, "learning_rate": 0.01, "loss": 2.0082, "step": 30924 }, { "epoch": 3.1778668310727496, "grad_norm": 0.04469950869679451, "learning_rate": 0.01, "loss": 2.0138, "step": 30927 }, { "epoch": 3.178175092478422, "grad_norm": 0.06740089505910873, "learning_rate": 0.01, "loss": 2.0051, "step": 30930 }, { "epoch": 3.1784833538840935, "grad_norm": 0.06175517663359642, "learning_rate": 0.01, "loss": 2.0215, "step": 30933 }, { "epoch": 3.1787916152897657, "grad_norm": 0.047650065273046494, "learning_rate": 0.01, "loss": 2.0246, "step": 30936 }, { "epoch": 3.179099876695438, "grad_norm": 0.07261566072702408, "learning_rate": 0.01, "loss": 2.0107, "step": 30939 }, { "epoch": 3.1794081381011097, "grad_norm": 0.1270940601825714, "learning_rate": 0.01, "loss": 2.0208, "step": 30942 }, { "epoch": 3.179716399506782, "grad_norm": 0.054443880915641785, "learning_rate": 0.01, "loss": 2.0272, "step": 30945 }, { "epoch": 3.1800246609124536, "grad_norm": 0.06243740767240524, "learning_rate": 0.01, "loss": 2.0013, "step": 30948 }, { "epoch": 3.180332922318126, "grad_norm": 0.04665446653962135, "learning_rate": 0.01, "loss": 2.0269, "step": 30951 }, { "epoch": 3.1806411837237976, "grad_norm": 0.0470532663166523, "learning_rate": 0.01, "loss": 2.0194, "step": 30954 }, { "epoch": 3.1809494451294698, "grad_norm": 0.03944860398769379, "learning_rate": 0.01, "loss": 2.0166, "step": 30957 }, { "epoch": 3.181257706535142, "grad_norm": 0.04705018922686577, "learning_rate": 0.01, "loss": 2.0044, "step": 30960 }, { "epoch": 3.1815659679408137, "grad_norm": 0.04603470116853714, "learning_rate": 0.01, "loss": 1.9635, "step": 30963 }, { "epoch": 3.181874229346486, "grad_norm": 0.04761103168129921, "learning_rate": 0.01, "loss": 2.0245, "step": 30966 }, { "epoch": 3.1821824907521576, "grad_norm": 0.09109831601381302, "learning_rate": 0.01, "loss": 1.9919, "step": 30969 }, { "epoch": 3.18249075215783, "grad_norm": 0.07111615687608719, "learning_rate": 0.01, "loss": 2.0123, "step": 30972 }, { "epoch": 3.182799013563502, "grad_norm": 0.07623612880706787, "learning_rate": 0.01, "loss": 1.9949, "step": 30975 }, { "epoch": 3.183107274969174, "grad_norm": 0.0768456682562828, "learning_rate": 0.01, "loss": 2.0323, "step": 30978 }, { "epoch": 3.183415536374846, "grad_norm": 0.07147437334060669, "learning_rate": 0.01, "loss": 2.0126, "step": 30981 }, { "epoch": 3.1837237977805177, "grad_norm": 0.04732421785593033, "learning_rate": 0.01, "loss": 1.9975, "step": 30984 }, { "epoch": 3.18403205918619, "grad_norm": 0.04406769201159477, "learning_rate": 0.01, "loss": 2.0201, "step": 30987 }, { "epoch": 3.184340320591862, "grad_norm": 0.03760458901524544, "learning_rate": 0.01, "loss": 1.9994, "step": 30990 }, { "epoch": 3.184648581997534, "grad_norm": 0.051892757415771484, "learning_rate": 0.01, "loss": 2.0146, "step": 30993 }, { "epoch": 3.184956843403206, "grad_norm": 0.03961913660168648, "learning_rate": 0.01, "loss": 2.0145, "step": 30996 }, { "epoch": 3.185265104808878, "grad_norm": 0.07263363152742386, "learning_rate": 0.01, "loss": 2.0183, "step": 30999 }, { "epoch": 3.18557336621455, "grad_norm": 0.08618062734603882, "learning_rate": 0.01, "loss": 2.0074, "step": 31002 }, { "epoch": 3.1858816276202218, "grad_norm": 0.08712664991617203, "learning_rate": 0.01, "loss": 2.0084, "step": 31005 }, { "epoch": 3.186189889025894, "grad_norm": 0.05723334103822708, "learning_rate": 0.01, "loss": 2.0231, "step": 31008 }, { "epoch": 3.186498150431566, "grad_norm": 0.06170898675918579, "learning_rate": 0.01, "loss": 2.0079, "step": 31011 }, { "epoch": 3.186806411837238, "grad_norm": 0.04730013385415077, "learning_rate": 0.01, "loss": 2.0081, "step": 31014 }, { "epoch": 3.18711467324291, "grad_norm": 0.0381946824491024, "learning_rate": 0.01, "loss": 2.0194, "step": 31017 }, { "epoch": 3.187422934648582, "grad_norm": 0.09591019153594971, "learning_rate": 0.01, "loss": 1.9788, "step": 31020 }, { "epoch": 3.187731196054254, "grad_norm": 0.047843087464571, "learning_rate": 0.01, "loss": 1.9948, "step": 31023 }, { "epoch": 3.188039457459926, "grad_norm": 0.04370959475636482, "learning_rate": 0.01, "loss": 2.0249, "step": 31026 }, { "epoch": 3.188347718865598, "grad_norm": 0.060692187398672104, "learning_rate": 0.01, "loss": 2.0313, "step": 31029 }, { "epoch": 3.18865598027127, "grad_norm": 0.05906793847680092, "learning_rate": 0.01, "loss": 2.0144, "step": 31032 }, { "epoch": 3.188964241676942, "grad_norm": 0.06203675642609596, "learning_rate": 0.01, "loss": 2.0325, "step": 31035 }, { "epoch": 3.189272503082614, "grad_norm": 0.07943592220544815, "learning_rate": 0.01, "loss": 2.0079, "step": 31038 }, { "epoch": 3.189580764488286, "grad_norm": 0.08803451061248779, "learning_rate": 0.01, "loss": 2.0055, "step": 31041 }, { "epoch": 3.189889025893958, "grad_norm": 0.07365550100803375, "learning_rate": 0.01, "loss": 2.0234, "step": 31044 }, { "epoch": 3.1901972872996303, "grad_norm": 0.0795077532529831, "learning_rate": 0.01, "loss": 2.0253, "step": 31047 }, { "epoch": 3.190505548705302, "grad_norm": 0.08341194689273834, "learning_rate": 0.01, "loss": 1.9972, "step": 31050 }, { "epoch": 3.190813810110974, "grad_norm": 0.05842220410704613, "learning_rate": 0.01, "loss": 2.0244, "step": 31053 }, { "epoch": 3.191122071516646, "grad_norm": 0.09980861842632294, "learning_rate": 0.01, "loss": 2.0056, "step": 31056 }, { "epoch": 3.191430332922318, "grad_norm": 0.061474163085222244, "learning_rate": 0.01, "loss": 2.0059, "step": 31059 }, { "epoch": 3.1917385943279903, "grad_norm": 0.06752124428749084, "learning_rate": 0.01, "loss": 2.0243, "step": 31062 }, { "epoch": 3.192046855733662, "grad_norm": 0.06160835176706314, "learning_rate": 0.01, "loss": 1.9988, "step": 31065 }, { "epoch": 3.1923551171393343, "grad_norm": 0.09516782313585281, "learning_rate": 0.01, "loss": 1.9864, "step": 31068 }, { "epoch": 3.192663378545006, "grad_norm": 0.049451183527708054, "learning_rate": 0.01, "loss": 2.0311, "step": 31071 }, { "epoch": 3.1929716399506782, "grad_norm": 0.08874432742595673, "learning_rate": 0.01, "loss": 2.0198, "step": 31074 }, { "epoch": 3.19327990135635, "grad_norm": 0.07393426448106766, "learning_rate": 0.01, "loss": 1.9918, "step": 31077 }, { "epoch": 3.193588162762022, "grad_norm": 0.09064768254756927, "learning_rate": 0.01, "loss": 2.027, "step": 31080 }, { "epoch": 3.1938964241676944, "grad_norm": 0.05667688325047493, "learning_rate": 0.01, "loss": 2.0058, "step": 31083 }, { "epoch": 3.194204685573366, "grad_norm": 0.03858955577015877, "learning_rate": 0.01, "loss": 2.0184, "step": 31086 }, { "epoch": 3.1945129469790383, "grad_norm": 0.03137395530939102, "learning_rate": 0.01, "loss": 1.9912, "step": 31089 }, { "epoch": 3.19482120838471, "grad_norm": 0.09366928040981293, "learning_rate": 0.01, "loss": 2.0279, "step": 31092 }, { "epoch": 3.1951294697903823, "grad_norm": 0.05282336100935936, "learning_rate": 0.01, "loss": 2.0145, "step": 31095 }, { "epoch": 3.1954377311960545, "grad_norm": 0.07649802416563034, "learning_rate": 0.01, "loss": 1.9961, "step": 31098 }, { "epoch": 3.195745992601726, "grad_norm": 0.07249470800161362, "learning_rate": 0.01, "loss": 2.0063, "step": 31101 }, { "epoch": 3.1960542540073984, "grad_norm": 0.12900890409946442, "learning_rate": 0.01, "loss": 1.9952, "step": 31104 }, { "epoch": 3.19636251541307, "grad_norm": 0.08060257881879807, "learning_rate": 0.01, "loss": 2.0247, "step": 31107 }, { "epoch": 3.1966707768187423, "grad_norm": 0.07051622122526169, "learning_rate": 0.01, "loss": 1.9965, "step": 31110 }, { "epoch": 3.1969790382244145, "grad_norm": 0.0567597970366478, "learning_rate": 0.01, "loss": 2.0072, "step": 31113 }, { "epoch": 3.1972872996300863, "grad_norm": 0.053274448961019516, "learning_rate": 0.01, "loss": 1.9992, "step": 31116 }, { "epoch": 3.1975955610357585, "grad_norm": 0.0634993240237236, "learning_rate": 0.01, "loss": 2.0111, "step": 31119 }, { "epoch": 3.1979038224414302, "grad_norm": 0.060209862887859344, "learning_rate": 0.01, "loss": 2.0062, "step": 31122 }, { "epoch": 3.1982120838471024, "grad_norm": 0.05891990661621094, "learning_rate": 0.01, "loss": 2.0394, "step": 31125 }, { "epoch": 3.198520345252774, "grad_norm": 0.03743661195039749, "learning_rate": 0.01, "loss": 1.993, "step": 31128 }, { "epoch": 3.1988286066584464, "grad_norm": 0.047772981226444244, "learning_rate": 0.01, "loss": 1.9922, "step": 31131 }, { "epoch": 3.1991368680641186, "grad_norm": 0.09134045243263245, "learning_rate": 0.01, "loss": 2.0073, "step": 31134 }, { "epoch": 3.1994451294697903, "grad_norm": 0.040920648723840714, "learning_rate": 0.01, "loss": 1.9876, "step": 31137 }, { "epoch": 3.1997533908754625, "grad_norm": 0.09245988726615906, "learning_rate": 0.01, "loss": 1.993, "step": 31140 }, { "epoch": 3.2000616522811343, "grad_norm": 0.06823042035102844, "learning_rate": 0.01, "loss": 1.9983, "step": 31143 }, { "epoch": 3.2003699136868065, "grad_norm": 0.10324928909540176, "learning_rate": 0.01, "loss": 2.0429, "step": 31146 }, { "epoch": 3.200678175092478, "grad_norm": 0.09294021129608154, "learning_rate": 0.01, "loss": 2.0278, "step": 31149 }, { "epoch": 3.2009864364981504, "grad_norm": 0.07790663093328476, "learning_rate": 0.01, "loss": 2.0043, "step": 31152 }, { "epoch": 3.2012946979038226, "grad_norm": 0.041240107268095016, "learning_rate": 0.01, "loss": 2.0223, "step": 31155 }, { "epoch": 3.2016029593094943, "grad_norm": 0.042019765824079514, "learning_rate": 0.01, "loss": 2.0019, "step": 31158 }, { "epoch": 3.2019112207151665, "grad_norm": 0.03546491637825966, "learning_rate": 0.01, "loss": 1.9972, "step": 31161 }, { "epoch": 3.2022194821208383, "grad_norm": 0.08740135282278061, "learning_rate": 0.01, "loss": 2.0364, "step": 31164 }, { "epoch": 3.2025277435265105, "grad_norm": 0.06613611429929733, "learning_rate": 0.01, "loss": 2.0345, "step": 31167 }, { "epoch": 3.2028360049321827, "grad_norm": 0.1089286357164383, "learning_rate": 0.01, "loss": 2.0027, "step": 31170 }, { "epoch": 3.2031442663378544, "grad_norm": 0.037778157740831375, "learning_rate": 0.01, "loss": 2.0168, "step": 31173 }, { "epoch": 3.2034525277435266, "grad_norm": 0.07214018702507019, "learning_rate": 0.01, "loss": 2.0162, "step": 31176 }, { "epoch": 3.2037607891491984, "grad_norm": 0.05182633548974991, "learning_rate": 0.01, "loss": 2.0078, "step": 31179 }, { "epoch": 3.2040690505548706, "grad_norm": 0.043808627873659134, "learning_rate": 0.01, "loss": 2.0266, "step": 31182 }, { "epoch": 3.2043773119605428, "grad_norm": 0.03699186071753502, "learning_rate": 0.01, "loss": 2.0391, "step": 31185 }, { "epoch": 3.2046855733662145, "grad_norm": 0.10249976068735123, "learning_rate": 0.01, "loss": 1.9939, "step": 31188 }, { "epoch": 3.2049938347718867, "grad_norm": 0.05326079949736595, "learning_rate": 0.01, "loss": 2.0082, "step": 31191 }, { "epoch": 3.2053020961775585, "grad_norm": 0.09879110008478165, "learning_rate": 0.01, "loss": 2.0149, "step": 31194 }, { "epoch": 3.2056103575832307, "grad_norm": 0.042158786207437515, "learning_rate": 0.01, "loss": 1.9861, "step": 31197 }, { "epoch": 3.2059186189889024, "grad_norm": 0.13437750935554504, "learning_rate": 0.01, "loss": 2.0179, "step": 31200 }, { "epoch": 3.2062268803945746, "grad_norm": 0.07030022144317627, "learning_rate": 0.01, "loss": 2.0499, "step": 31203 }, { "epoch": 3.206535141800247, "grad_norm": 0.044758979231119156, "learning_rate": 0.01, "loss": 1.9956, "step": 31206 }, { "epoch": 3.2068434032059185, "grad_norm": 0.030563069507479668, "learning_rate": 0.01, "loss": 1.9987, "step": 31209 }, { "epoch": 3.2071516646115907, "grad_norm": 0.047487739473581314, "learning_rate": 0.01, "loss": 1.9763, "step": 31212 }, { "epoch": 3.2074599260172625, "grad_norm": 0.044461995363235474, "learning_rate": 0.01, "loss": 2.015, "step": 31215 }, { "epoch": 3.2077681874229347, "grad_norm": 0.16204911470413208, "learning_rate": 0.01, "loss": 2.0259, "step": 31218 }, { "epoch": 3.2080764488286064, "grad_norm": 0.0470188669860363, "learning_rate": 0.01, "loss": 2.0244, "step": 31221 }, { "epoch": 3.2083847102342786, "grad_norm": 0.04323815181851387, "learning_rate": 0.01, "loss": 2.0158, "step": 31224 }, { "epoch": 3.208692971639951, "grad_norm": 0.04388147220015526, "learning_rate": 0.01, "loss": 1.9996, "step": 31227 }, { "epoch": 3.2090012330456226, "grad_norm": 0.06811438500881195, "learning_rate": 0.01, "loss": 2.0196, "step": 31230 }, { "epoch": 3.2093094944512948, "grad_norm": 0.09423845261335373, "learning_rate": 0.01, "loss": 1.99, "step": 31233 }, { "epoch": 3.2096177558569665, "grad_norm": 0.05107983946800232, "learning_rate": 0.01, "loss": 2.0203, "step": 31236 }, { "epoch": 3.2099260172626387, "grad_norm": 0.06188793480396271, "learning_rate": 0.01, "loss": 2.0283, "step": 31239 }, { "epoch": 3.210234278668311, "grad_norm": 0.04658438265323639, "learning_rate": 0.01, "loss": 1.9986, "step": 31242 }, { "epoch": 3.2105425400739827, "grad_norm": 0.05382300913333893, "learning_rate": 0.01, "loss": 2.0242, "step": 31245 }, { "epoch": 3.210850801479655, "grad_norm": 0.04271601140499115, "learning_rate": 0.01, "loss": 1.9952, "step": 31248 }, { "epoch": 3.2111590628853266, "grad_norm": 0.06256501376628876, "learning_rate": 0.01, "loss": 2.0109, "step": 31251 }, { "epoch": 3.211467324290999, "grad_norm": 0.03523874282836914, "learning_rate": 0.01, "loss": 2.0107, "step": 31254 }, { "epoch": 3.211775585696671, "grad_norm": 0.18781809508800507, "learning_rate": 0.01, "loss": 2.0421, "step": 31257 }, { "epoch": 3.2120838471023427, "grad_norm": 0.11113902926445007, "learning_rate": 0.01, "loss": 2.0158, "step": 31260 }, { "epoch": 3.212392108508015, "grad_norm": 0.11578498035669327, "learning_rate": 0.01, "loss": 1.9993, "step": 31263 }, { "epoch": 3.2127003699136867, "grad_norm": 0.06620439887046814, "learning_rate": 0.01, "loss": 1.9711, "step": 31266 }, { "epoch": 3.213008631319359, "grad_norm": 0.04588307812809944, "learning_rate": 0.01, "loss": 2.0262, "step": 31269 }, { "epoch": 3.2133168927250306, "grad_norm": 0.06933780014514923, "learning_rate": 0.01, "loss": 2.0117, "step": 31272 }, { "epoch": 3.213625154130703, "grad_norm": 0.03479130566120148, "learning_rate": 0.01, "loss": 1.996, "step": 31275 }, { "epoch": 3.213933415536375, "grad_norm": 0.05823906138539314, "learning_rate": 0.01, "loss": 1.9988, "step": 31278 }, { "epoch": 3.2142416769420468, "grad_norm": 0.044943809509277344, "learning_rate": 0.01, "loss": 2.0121, "step": 31281 }, { "epoch": 3.214549938347719, "grad_norm": 0.05428524687886238, "learning_rate": 0.01, "loss": 1.9761, "step": 31284 }, { "epoch": 3.2148581997533907, "grad_norm": 0.07103761285543442, "learning_rate": 0.01, "loss": 2.0401, "step": 31287 }, { "epoch": 3.215166461159063, "grad_norm": 0.1237725242972374, "learning_rate": 0.01, "loss": 2.023, "step": 31290 }, { "epoch": 3.215474722564735, "grad_norm": 0.03638492897152901, "learning_rate": 0.01, "loss": 2.0036, "step": 31293 }, { "epoch": 3.215782983970407, "grad_norm": 0.05026063695549965, "learning_rate": 0.01, "loss": 1.9929, "step": 31296 }, { "epoch": 3.216091245376079, "grad_norm": 0.09602409601211548, "learning_rate": 0.01, "loss": 2.011, "step": 31299 }, { "epoch": 3.216399506781751, "grad_norm": 0.12711849808692932, "learning_rate": 0.01, "loss": 1.9912, "step": 31302 }, { "epoch": 3.216707768187423, "grad_norm": 0.051612287759780884, "learning_rate": 0.01, "loss": 2.0397, "step": 31305 }, { "epoch": 3.2170160295930947, "grad_norm": 0.055910855531692505, "learning_rate": 0.01, "loss": 2.0282, "step": 31308 }, { "epoch": 3.217324290998767, "grad_norm": 0.053995631635189056, "learning_rate": 0.01, "loss": 2.0034, "step": 31311 }, { "epoch": 3.217632552404439, "grad_norm": 0.03721768036484718, "learning_rate": 0.01, "loss": 1.9957, "step": 31314 }, { "epoch": 3.217940813810111, "grad_norm": 0.04018721356987953, "learning_rate": 0.01, "loss": 2.0215, "step": 31317 }, { "epoch": 3.218249075215783, "grad_norm": 0.06476118415594101, "learning_rate": 0.01, "loss": 2.0138, "step": 31320 }, { "epoch": 3.218557336621455, "grad_norm": 0.09766072034835815, "learning_rate": 0.01, "loss": 2.0343, "step": 31323 }, { "epoch": 3.218865598027127, "grad_norm": 0.0436365082859993, "learning_rate": 0.01, "loss": 2.0081, "step": 31326 }, { "epoch": 3.219173859432799, "grad_norm": 0.04617559164762497, "learning_rate": 0.01, "loss": 1.9809, "step": 31329 }, { "epoch": 3.219482120838471, "grad_norm": 0.0487680621445179, "learning_rate": 0.01, "loss": 2.0244, "step": 31332 }, { "epoch": 3.219790382244143, "grad_norm": 0.036623213440179825, "learning_rate": 0.01, "loss": 2.0159, "step": 31335 }, { "epoch": 3.220098643649815, "grad_norm": 0.04713229089975357, "learning_rate": 0.01, "loss": 2.01, "step": 31338 }, { "epoch": 3.220406905055487, "grad_norm": 0.03848060593008995, "learning_rate": 0.01, "loss": 1.9975, "step": 31341 }, { "epoch": 3.220715166461159, "grad_norm": 0.03674410656094551, "learning_rate": 0.01, "loss": 2.031, "step": 31344 }, { "epoch": 3.221023427866831, "grad_norm": 0.09635547548532486, "learning_rate": 0.01, "loss": 2.0069, "step": 31347 }, { "epoch": 3.2213316892725032, "grad_norm": 0.06311628222465515, "learning_rate": 0.01, "loss": 1.9864, "step": 31350 }, { "epoch": 3.221639950678175, "grad_norm": 0.05867060646414757, "learning_rate": 0.01, "loss": 2.0282, "step": 31353 }, { "epoch": 3.221948212083847, "grad_norm": 0.0816822350025177, "learning_rate": 0.01, "loss": 2.0192, "step": 31356 }, { "epoch": 3.222256473489519, "grad_norm": 0.0825878456234932, "learning_rate": 0.01, "loss": 2.0045, "step": 31359 }, { "epoch": 3.222564734895191, "grad_norm": 0.06253322958946228, "learning_rate": 0.01, "loss": 2.0461, "step": 31362 }, { "epoch": 3.2228729963008633, "grad_norm": 0.07637360692024231, "learning_rate": 0.01, "loss": 2.0028, "step": 31365 }, { "epoch": 3.223181257706535, "grad_norm": 0.081746406853199, "learning_rate": 0.01, "loss": 2.0226, "step": 31368 }, { "epoch": 3.2234895191122073, "grad_norm": 0.11683212220668793, "learning_rate": 0.01, "loss": 2.0075, "step": 31371 }, { "epoch": 3.223797780517879, "grad_norm": 0.07254528999328613, "learning_rate": 0.01, "loss": 2.0392, "step": 31374 }, { "epoch": 3.224106041923551, "grad_norm": 0.05394696444272995, "learning_rate": 0.01, "loss": 2.0199, "step": 31377 }, { "epoch": 3.2244143033292234, "grad_norm": 0.044905390590429306, "learning_rate": 0.01, "loss": 2.0156, "step": 31380 }, { "epoch": 3.224722564734895, "grad_norm": 0.07633423805236816, "learning_rate": 0.01, "loss": 2.0096, "step": 31383 }, { "epoch": 3.2250308261405674, "grad_norm": 0.03892616555094719, "learning_rate": 0.01, "loss": 2.014, "step": 31386 }, { "epoch": 3.225339087546239, "grad_norm": 0.03514908254146576, "learning_rate": 0.01, "loss": 2.0173, "step": 31389 }, { "epoch": 3.2256473489519113, "grad_norm": 0.07658538222312927, "learning_rate": 0.01, "loss": 2.0052, "step": 31392 }, { "epoch": 3.225955610357583, "grad_norm": 0.1004643440246582, "learning_rate": 0.01, "loss": 2.01, "step": 31395 }, { "epoch": 3.2262638717632552, "grad_norm": 0.08478312194347382, "learning_rate": 0.01, "loss": 2.0184, "step": 31398 }, { "epoch": 3.2265721331689274, "grad_norm": 0.05344710871577263, "learning_rate": 0.01, "loss": 1.9995, "step": 31401 }, { "epoch": 3.226880394574599, "grad_norm": 0.05862101912498474, "learning_rate": 0.01, "loss": 1.9907, "step": 31404 }, { "epoch": 3.2271886559802714, "grad_norm": 0.03430565074086189, "learning_rate": 0.01, "loss": 1.9976, "step": 31407 }, { "epoch": 3.227496917385943, "grad_norm": 0.03822310268878937, "learning_rate": 0.01, "loss": 2.0081, "step": 31410 }, { "epoch": 3.2278051787916153, "grad_norm": 0.04391561448574066, "learning_rate": 0.01, "loss": 2.0239, "step": 31413 }, { "epoch": 3.228113440197287, "grad_norm": 0.053595174103975296, "learning_rate": 0.01, "loss": 1.994, "step": 31416 }, { "epoch": 3.2284217016029593, "grad_norm": 0.06710030138492584, "learning_rate": 0.01, "loss": 2.0249, "step": 31419 }, { "epoch": 3.2287299630086315, "grad_norm": 0.19077260792255402, "learning_rate": 0.01, "loss": 2.0214, "step": 31422 }, { "epoch": 3.229038224414303, "grad_norm": 0.050228483974933624, "learning_rate": 0.01, "loss": 1.9957, "step": 31425 }, { "epoch": 3.2293464858199754, "grad_norm": 0.047246966511011124, "learning_rate": 0.01, "loss": 1.9984, "step": 31428 }, { "epoch": 3.229654747225647, "grad_norm": 0.03976801037788391, "learning_rate": 0.01, "loss": 2.0247, "step": 31431 }, { "epoch": 3.2299630086313194, "grad_norm": 0.052110929042100906, "learning_rate": 0.01, "loss": 2.0176, "step": 31434 }, { "epoch": 3.2302712700369915, "grad_norm": 0.1090363934636116, "learning_rate": 0.01, "loss": 2.011, "step": 31437 }, { "epoch": 3.2305795314426633, "grad_norm": 0.13336141407489777, "learning_rate": 0.01, "loss": 2.0023, "step": 31440 }, { "epoch": 3.2308877928483355, "grad_norm": 0.11287815868854523, "learning_rate": 0.01, "loss": 2.0384, "step": 31443 }, { "epoch": 3.2311960542540072, "grad_norm": 0.05775724723935127, "learning_rate": 0.01, "loss": 2.0001, "step": 31446 }, { "epoch": 3.2315043156596794, "grad_norm": 0.0489221066236496, "learning_rate": 0.01, "loss": 1.9857, "step": 31449 }, { "epoch": 3.2318125770653516, "grad_norm": 0.10708906501531601, "learning_rate": 0.01, "loss": 2.0214, "step": 31452 }, { "epoch": 3.2321208384710234, "grad_norm": 0.04961223527789116, "learning_rate": 0.01, "loss": 2.0244, "step": 31455 }, { "epoch": 3.2324290998766956, "grad_norm": 0.04106178134679794, "learning_rate": 0.01, "loss": 2.027, "step": 31458 }, { "epoch": 3.2327373612823673, "grad_norm": 0.051446568220853806, "learning_rate": 0.01, "loss": 2.0078, "step": 31461 }, { "epoch": 3.2330456226880395, "grad_norm": 0.046735066920518875, "learning_rate": 0.01, "loss": 2.0196, "step": 31464 }, { "epoch": 3.2333538840937113, "grad_norm": 0.04740822687745094, "learning_rate": 0.01, "loss": 1.9849, "step": 31467 }, { "epoch": 3.2336621454993835, "grad_norm": 0.03593340888619423, "learning_rate": 0.01, "loss": 2.0055, "step": 31470 }, { "epoch": 3.2339704069050557, "grad_norm": 0.058127082884311676, "learning_rate": 0.01, "loss": 2.0318, "step": 31473 }, { "epoch": 3.2342786683107274, "grad_norm": 0.05138585716485977, "learning_rate": 0.01, "loss": 2.0052, "step": 31476 }, { "epoch": 3.2345869297163996, "grad_norm": 0.04029039293527603, "learning_rate": 0.01, "loss": 2.0272, "step": 31479 }, { "epoch": 3.2348951911220714, "grad_norm": 0.05703110247850418, "learning_rate": 0.01, "loss": 2.0115, "step": 31482 }, { "epoch": 3.2352034525277436, "grad_norm": 0.06218143180012703, "learning_rate": 0.01, "loss": 1.9987, "step": 31485 }, { "epoch": 3.2355117139334153, "grad_norm": 0.07277306169271469, "learning_rate": 0.01, "loss": 2.0413, "step": 31488 }, { "epoch": 3.2358199753390875, "grad_norm": 0.08520001918077469, "learning_rate": 0.01, "loss": 2.0058, "step": 31491 }, { "epoch": 3.2361282367447597, "grad_norm": 0.062044426798820496, "learning_rate": 0.01, "loss": 2.0055, "step": 31494 }, { "epoch": 3.2364364981504314, "grad_norm": 0.062475502490997314, "learning_rate": 0.01, "loss": 2.0124, "step": 31497 }, { "epoch": 3.2367447595561036, "grad_norm": 0.05683014541864395, "learning_rate": 0.01, "loss": 2.0253, "step": 31500 }, { "epoch": 3.2370530209617754, "grad_norm": 0.0409809909760952, "learning_rate": 0.01, "loss": 2.0286, "step": 31503 }, { "epoch": 3.2373612823674476, "grad_norm": 0.05398055911064148, "learning_rate": 0.01, "loss": 2.0239, "step": 31506 }, { "epoch": 3.2376695437731198, "grad_norm": 0.09552880376577377, "learning_rate": 0.01, "loss": 2.0232, "step": 31509 }, { "epoch": 3.2379778051787915, "grad_norm": 0.042877551168203354, "learning_rate": 0.01, "loss": 2.007, "step": 31512 }, { "epoch": 3.2382860665844637, "grad_norm": 0.03086467832326889, "learning_rate": 0.01, "loss": 1.9831, "step": 31515 }, { "epoch": 3.2385943279901355, "grad_norm": 0.03575371578335762, "learning_rate": 0.01, "loss": 1.9614, "step": 31518 }, { "epoch": 3.2389025893958077, "grad_norm": 0.04962699115276337, "learning_rate": 0.01, "loss": 2.0084, "step": 31521 }, { "epoch": 3.23921085080148, "grad_norm": 0.07324356585741043, "learning_rate": 0.01, "loss": 2.0213, "step": 31524 }, { "epoch": 3.2395191122071516, "grad_norm": 0.08169112354516983, "learning_rate": 0.01, "loss": 2.0061, "step": 31527 }, { "epoch": 3.239827373612824, "grad_norm": 0.04129362106323242, "learning_rate": 0.01, "loss": 2.0143, "step": 31530 }, { "epoch": 3.2401356350184956, "grad_norm": 0.04921339079737663, "learning_rate": 0.01, "loss": 2.0248, "step": 31533 }, { "epoch": 3.2404438964241677, "grad_norm": 0.048255033791065216, "learning_rate": 0.01, "loss": 2.0486, "step": 31536 }, { "epoch": 3.2407521578298395, "grad_norm": 0.0420132540166378, "learning_rate": 0.01, "loss": 2.0254, "step": 31539 }, { "epoch": 3.2410604192355117, "grad_norm": 0.037761542946100235, "learning_rate": 0.01, "loss": 2.0105, "step": 31542 }, { "epoch": 3.241368680641184, "grad_norm": 0.059902340173721313, "learning_rate": 0.01, "loss": 2.0371, "step": 31545 }, { "epoch": 3.2416769420468556, "grad_norm": 0.07208621501922607, "learning_rate": 0.01, "loss": 2.014, "step": 31548 }, { "epoch": 3.241985203452528, "grad_norm": 0.039220135658979416, "learning_rate": 0.01, "loss": 2.0208, "step": 31551 }, { "epoch": 3.2422934648581996, "grad_norm": 0.09032812714576721, "learning_rate": 0.01, "loss": 1.9956, "step": 31554 }, { "epoch": 3.2426017262638718, "grad_norm": 0.04668070003390312, "learning_rate": 0.01, "loss": 2.0204, "step": 31557 }, { "epoch": 3.242909987669544, "grad_norm": 0.07167590409517288, "learning_rate": 0.01, "loss": 1.9882, "step": 31560 }, { "epoch": 3.2432182490752157, "grad_norm": 0.09092319756746292, "learning_rate": 0.01, "loss": 2.0257, "step": 31563 }, { "epoch": 3.243526510480888, "grad_norm": 0.05797998234629631, "learning_rate": 0.01, "loss": 1.9923, "step": 31566 }, { "epoch": 3.2438347718865597, "grad_norm": 0.05728116258978844, "learning_rate": 0.01, "loss": 2.0134, "step": 31569 }, { "epoch": 3.244143033292232, "grad_norm": 0.10221456736326218, "learning_rate": 0.01, "loss": 2.0452, "step": 31572 }, { "epoch": 3.244451294697904, "grad_norm": 0.0785583034157753, "learning_rate": 0.01, "loss": 1.9956, "step": 31575 }, { "epoch": 3.244759556103576, "grad_norm": 0.06105490028858185, "learning_rate": 0.01, "loss": 2.035, "step": 31578 }, { "epoch": 3.245067817509248, "grad_norm": 0.031959742307662964, "learning_rate": 0.01, "loss": 2.0186, "step": 31581 }, { "epoch": 3.2453760789149197, "grad_norm": 0.07373019307851791, "learning_rate": 0.01, "loss": 1.9778, "step": 31584 }, { "epoch": 3.245684340320592, "grad_norm": 0.05989330634474754, "learning_rate": 0.01, "loss": 2.0059, "step": 31587 }, { "epoch": 3.2459926017262637, "grad_norm": 0.0927957072854042, "learning_rate": 0.01, "loss": 1.9979, "step": 31590 }, { "epoch": 3.246300863131936, "grad_norm": 0.048555102199316025, "learning_rate": 0.01, "loss": 2.0111, "step": 31593 }, { "epoch": 3.246609124537608, "grad_norm": 0.05515364184975624, "learning_rate": 0.01, "loss": 2.0052, "step": 31596 }, { "epoch": 3.24691738594328, "grad_norm": 0.04753278195858002, "learning_rate": 0.01, "loss": 2.0171, "step": 31599 }, { "epoch": 3.247225647348952, "grad_norm": 0.06210014224052429, "learning_rate": 0.01, "loss": 1.9689, "step": 31602 }, { "epoch": 3.2475339087546238, "grad_norm": 0.060913342982530594, "learning_rate": 0.01, "loss": 2.0183, "step": 31605 }, { "epoch": 3.247842170160296, "grad_norm": 0.08301867544651031, "learning_rate": 0.01, "loss": 2.0182, "step": 31608 }, { "epoch": 3.2481504315659677, "grad_norm": 0.06884342432022095, "learning_rate": 0.01, "loss": 2.0332, "step": 31611 }, { "epoch": 3.24845869297164, "grad_norm": 0.046835094690322876, "learning_rate": 0.01, "loss": 2.0099, "step": 31614 }, { "epoch": 3.248766954377312, "grad_norm": 0.11094243824481964, "learning_rate": 0.01, "loss": 2.0319, "step": 31617 }, { "epoch": 3.249075215782984, "grad_norm": 0.0603213869035244, "learning_rate": 0.01, "loss": 2.0181, "step": 31620 }, { "epoch": 3.249383477188656, "grad_norm": 0.07283317297697067, "learning_rate": 0.01, "loss": 2.0205, "step": 31623 }, { "epoch": 3.249691738594328, "grad_norm": 0.05507282167673111, "learning_rate": 0.01, "loss": 2.0082, "step": 31626 }, { "epoch": 3.25, "grad_norm": 0.039191924035549164, "learning_rate": 0.01, "loss": 1.9936, "step": 31629 }, { "epoch": 3.250308261405672, "grad_norm": 0.1467886120080948, "learning_rate": 0.01, "loss": 2.0235, "step": 31632 }, { "epoch": 3.250616522811344, "grad_norm": 0.06955720484256744, "learning_rate": 0.01, "loss": 2.0168, "step": 31635 }, { "epoch": 3.250924784217016, "grad_norm": 0.0634884312748909, "learning_rate": 0.01, "loss": 2.0123, "step": 31638 }, { "epoch": 3.251233045622688, "grad_norm": 0.035188958048820496, "learning_rate": 0.01, "loss": 2.0061, "step": 31641 }, { "epoch": 3.25154130702836, "grad_norm": 0.07601841539144516, "learning_rate": 0.01, "loss": 2.0046, "step": 31644 }, { "epoch": 3.2518495684340323, "grad_norm": 0.06929823011159897, "learning_rate": 0.01, "loss": 2.0213, "step": 31647 }, { "epoch": 3.252157829839704, "grad_norm": 0.06300003081560135, "learning_rate": 0.01, "loss": 2.0111, "step": 31650 }, { "epoch": 3.2524660912453762, "grad_norm": 0.04881738871335983, "learning_rate": 0.01, "loss": 2.0423, "step": 31653 }, { "epoch": 3.252774352651048, "grad_norm": 0.04084230959415436, "learning_rate": 0.01, "loss": 2.0284, "step": 31656 }, { "epoch": 3.25308261405672, "grad_norm": 0.10158465802669525, "learning_rate": 0.01, "loss": 2.0084, "step": 31659 }, { "epoch": 3.253390875462392, "grad_norm": 0.04703471064567566, "learning_rate": 0.01, "loss": 1.9824, "step": 31662 }, { "epoch": 3.253699136868064, "grad_norm": 0.11601495742797852, "learning_rate": 0.01, "loss": 2.0249, "step": 31665 }, { "epoch": 3.2540073982737363, "grad_norm": 0.037081558257341385, "learning_rate": 0.01, "loss": 2.0104, "step": 31668 }, { "epoch": 3.254315659679408, "grad_norm": 0.03952954337000847, "learning_rate": 0.01, "loss": 2.023, "step": 31671 }, { "epoch": 3.2546239210850803, "grad_norm": 0.09437094628810883, "learning_rate": 0.01, "loss": 2.0528, "step": 31674 }, { "epoch": 3.254932182490752, "grad_norm": 0.04648669436573982, "learning_rate": 0.01, "loss": 1.9822, "step": 31677 }, { "epoch": 3.255240443896424, "grad_norm": 0.06830704212188721, "learning_rate": 0.01, "loss": 2.0029, "step": 31680 }, { "epoch": 3.255548705302096, "grad_norm": 0.039575349539518356, "learning_rate": 0.01, "loss": 2.0177, "step": 31683 }, { "epoch": 3.255856966707768, "grad_norm": 0.046931225806474686, "learning_rate": 0.01, "loss": 2.0081, "step": 31686 }, { "epoch": 3.2561652281134403, "grad_norm": 0.039921220391988754, "learning_rate": 0.01, "loss": 1.9982, "step": 31689 }, { "epoch": 3.256473489519112, "grad_norm": 0.1725001186132431, "learning_rate": 0.01, "loss": 1.9933, "step": 31692 }, { "epoch": 3.2567817509247843, "grad_norm": 0.10149878263473511, "learning_rate": 0.01, "loss": 2.0031, "step": 31695 }, { "epoch": 3.2570900123304565, "grad_norm": 0.0641789436340332, "learning_rate": 0.01, "loss": 2.0298, "step": 31698 }, { "epoch": 3.2573982737361282, "grad_norm": 0.09631607681512833, "learning_rate": 0.01, "loss": 2.0369, "step": 31701 }, { "epoch": 3.2577065351418004, "grad_norm": 0.04941624775528908, "learning_rate": 0.01, "loss": 2.0078, "step": 31704 }, { "epoch": 3.258014796547472, "grad_norm": 0.03704220801591873, "learning_rate": 0.01, "loss": 2.0192, "step": 31707 }, { "epoch": 3.2583230579531444, "grad_norm": 0.05716734007000923, "learning_rate": 0.01, "loss": 2.0117, "step": 31710 }, { "epoch": 3.258631319358816, "grad_norm": 0.04187513142824173, "learning_rate": 0.01, "loss": 2.0298, "step": 31713 }, { "epoch": 3.2589395807644883, "grad_norm": 0.04240023344755173, "learning_rate": 0.01, "loss": 2.0227, "step": 31716 }, { "epoch": 3.2592478421701605, "grad_norm": 0.0434168316423893, "learning_rate": 0.01, "loss": 2.0293, "step": 31719 }, { "epoch": 3.2595561035758323, "grad_norm": 0.10827293992042542, "learning_rate": 0.01, "loss": 2.019, "step": 31722 }, { "epoch": 3.2598643649815044, "grad_norm": 0.04294963926076889, "learning_rate": 0.01, "loss": 2.0289, "step": 31725 }, { "epoch": 3.260172626387176, "grad_norm": 0.11746285110712051, "learning_rate": 0.01, "loss": 2.0168, "step": 31728 }, { "epoch": 3.2604808877928484, "grad_norm": 0.13203038275241852, "learning_rate": 0.01, "loss": 2.0092, "step": 31731 }, { "epoch": 3.26078914919852, "grad_norm": 0.12324203550815582, "learning_rate": 0.01, "loss": 2.0158, "step": 31734 }, { "epoch": 3.2610974106041923, "grad_norm": 0.09455161541700363, "learning_rate": 0.01, "loss": 2.0113, "step": 31737 }, { "epoch": 3.2614056720098645, "grad_norm": 0.0551525354385376, "learning_rate": 0.01, "loss": 2.0042, "step": 31740 }, { "epoch": 3.2617139334155363, "grad_norm": 0.04528261721134186, "learning_rate": 0.01, "loss": 2.0083, "step": 31743 }, { "epoch": 3.2620221948212085, "grad_norm": 0.049497511237859726, "learning_rate": 0.01, "loss": 1.9953, "step": 31746 }, { "epoch": 3.2623304562268802, "grad_norm": 0.04797462373971939, "learning_rate": 0.01, "loss": 1.9974, "step": 31749 }, { "epoch": 3.2626387176325524, "grad_norm": 0.047107502818107605, "learning_rate": 0.01, "loss": 1.99, "step": 31752 }, { "epoch": 3.262946979038224, "grad_norm": 0.03374217078089714, "learning_rate": 0.01, "loss": 1.98, "step": 31755 }, { "epoch": 3.2632552404438964, "grad_norm": 0.05429157614707947, "learning_rate": 0.01, "loss": 2.0089, "step": 31758 }, { "epoch": 3.2635635018495686, "grad_norm": 0.12066765129566193, "learning_rate": 0.01, "loss": 2.0365, "step": 31761 }, { "epoch": 3.2638717632552403, "grad_norm": 0.15858452022075653, "learning_rate": 0.01, "loss": 2.021, "step": 31764 }, { "epoch": 3.2641800246609125, "grad_norm": 0.05770573392510414, "learning_rate": 0.01, "loss": 2.0513, "step": 31767 }, { "epoch": 3.2644882860665847, "grad_norm": 0.053706735372543335, "learning_rate": 0.01, "loss": 1.9939, "step": 31770 }, { "epoch": 3.2647965474722564, "grad_norm": 0.03511708602309227, "learning_rate": 0.01, "loss": 2.0108, "step": 31773 }, { "epoch": 3.2651048088779286, "grad_norm": 0.036902979016304016, "learning_rate": 0.01, "loss": 2.0264, "step": 31776 }, { "epoch": 3.2654130702836004, "grad_norm": 0.0450621172785759, "learning_rate": 0.01, "loss": 2.0203, "step": 31779 }, { "epoch": 3.2657213316892726, "grad_norm": 0.07691927254199982, "learning_rate": 0.01, "loss": 2.0167, "step": 31782 }, { "epoch": 3.2660295930949443, "grad_norm": 0.09160054475069046, "learning_rate": 0.01, "loss": 2.0221, "step": 31785 }, { "epoch": 3.2663378545006165, "grad_norm": 0.09052203595638275, "learning_rate": 0.01, "loss": 2.0185, "step": 31788 }, { "epoch": 3.2666461159062887, "grad_norm": 0.08806884288787842, "learning_rate": 0.01, "loss": 2.0103, "step": 31791 }, { "epoch": 3.2669543773119605, "grad_norm": 0.05619393661618233, "learning_rate": 0.01, "loss": 1.9998, "step": 31794 }, { "epoch": 3.2672626387176327, "grad_norm": 0.05563674122095108, "learning_rate": 0.01, "loss": 2.015, "step": 31797 }, { "epoch": 3.2675709001233044, "grad_norm": 0.058199040591716766, "learning_rate": 0.01, "loss": 2.014, "step": 31800 }, { "epoch": 3.2678791615289766, "grad_norm": 0.06011686101555824, "learning_rate": 0.01, "loss": 2.0103, "step": 31803 }, { "epoch": 3.2681874229346484, "grad_norm": 0.05391063541173935, "learning_rate": 0.01, "loss": 2.0047, "step": 31806 }, { "epoch": 3.2684956843403206, "grad_norm": 0.1030382513999939, "learning_rate": 0.01, "loss": 2.0132, "step": 31809 }, { "epoch": 3.2688039457459928, "grad_norm": 0.03978987783193588, "learning_rate": 0.01, "loss": 2.0099, "step": 31812 }, { "epoch": 3.2691122071516645, "grad_norm": 0.08974360674619675, "learning_rate": 0.01, "loss": 1.9975, "step": 31815 }, { "epoch": 3.2694204685573367, "grad_norm": 0.07530324161052704, "learning_rate": 0.01, "loss": 2.0224, "step": 31818 }, { "epoch": 3.2697287299630085, "grad_norm": 0.04715275764465332, "learning_rate": 0.01, "loss": 2.0011, "step": 31821 }, { "epoch": 3.2700369913686806, "grad_norm": 0.050625238567590714, "learning_rate": 0.01, "loss": 1.9988, "step": 31824 }, { "epoch": 3.270345252774353, "grad_norm": 0.05567210912704468, "learning_rate": 0.01, "loss": 2.0554, "step": 31827 }, { "epoch": 3.2706535141800246, "grad_norm": 0.09813915193080902, "learning_rate": 0.01, "loss": 1.9994, "step": 31830 }, { "epoch": 3.270961775585697, "grad_norm": 0.08543343842029572, "learning_rate": 0.01, "loss": 2.0405, "step": 31833 }, { "epoch": 3.2712700369913685, "grad_norm": 0.03340763971209526, "learning_rate": 0.01, "loss": 2.0302, "step": 31836 }, { "epoch": 3.2715782983970407, "grad_norm": 0.0335637666285038, "learning_rate": 0.01, "loss": 2.0286, "step": 31839 }, { "epoch": 3.271886559802713, "grad_norm": 0.06983290612697601, "learning_rate": 0.01, "loss": 2.0155, "step": 31842 }, { "epoch": 3.2721948212083847, "grad_norm": 0.1157732903957367, "learning_rate": 0.01, "loss": 2.0133, "step": 31845 }, { "epoch": 3.272503082614057, "grad_norm": 0.11864369362592697, "learning_rate": 0.01, "loss": 2.0367, "step": 31848 }, { "epoch": 3.2728113440197286, "grad_norm": 0.09083148092031479, "learning_rate": 0.01, "loss": 2.025, "step": 31851 }, { "epoch": 3.273119605425401, "grad_norm": 0.06563573330640793, "learning_rate": 0.01, "loss": 2.0175, "step": 31854 }, { "epoch": 3.2734278668310726, "grad_norm": 0.043879635632038116, "learning_rate": 0.01, "loss": 2.0288, "step": 31857 }, { "epoch": 3.2737361282367448, "grad_norm": 0.03548846393823624, "learning_rate": 0.01, "loss": 2.0246, "step": 31860 }, { "epoch": 3.274044389642417, "grad_norm": 0.036084435880184174, "learning_rate": 0.01, "loss": 2.0007, "step": 31863 }, { "epoch": 3.2743526510480887, "grad_norm": 0.03813619166612625, "learning_rate": 0.01, "loss": 2.0024, "step": 31866 }, { "epoch": 3.274660912453761, "grad_norm": 0.05276734009385109, "learning_rate": 0.01, "loss": 1.9945, "step": 31869 }, { "epoch": 3.2749691738594326, "grad_norm": 0.07557803392410278, "learning_rate": 0.01, "loss": 2.0038, "step": 31872 }, { "epoch": 3.275277435265105, "grad_norm": 0.060760460793972015, "learning_rate": 0.01, "loss": 2.0283, "step": 31875 }, { "epoch": 3.2755856966707766, "grad_norm": 0.11498884111642838, "learning_rate": 0.01, "loss": 2.0256, "step": 31878 }, { "epoch": 3.275893958076449, "grad_norm": 0.12193593382835388, "learning_rate": 0.01, "loss": 1.9791, "step": 31881 }, { "epoch": 3.276202219482121, "grad_norm": 0.10637890547513962, "learning_rate": 0.01, "loss": 2.0055, "step": 31884 }, { "epoch": 3.2765104808877927, "grad_norm": 0.08633279800415039, "learning_rate": 0.01, "loss": 2.0151, "step": 31887 }, { "epoch": 3.276818742293465, "grad_norm": 0.08105628192424774, "learning_rate": 0.01, "loss": 2.0124, "step": 31890 }, { "epoch": 3.2771270036991367, "grad_norm": 0.03692932799458504, "learning_rate": 0.01, "loss": 2.0134, "step": 31893 }, { "epoch": 3.277435265104809, "grad_norm": 0.03064770996570587, "learning_rate": 0.01, "loss": 2.0088, "step": 31896 }, { "epoch": 3.277743526510481, "grad_norm": 0.038218267261981964, "learning_rate": 0.01, "loss": 2.0045, "step": 31899 }, { "epoch": 3.278051787916153, "grad_norm": 0.05573924258351326, "learning_rate": 0.01, "loss": 2.0289, "step": 31902 }, { "epoch": 3.278360049321825, "grad_norm": 0.14293062686920166, "learning_rate": 0.01, "loss": 2.0147, "step": 31905 }, { "epoch": 3.2786683107274968, "grad_norm": 0.082049660384655, "learning_rate": 0.01, "loss": 2.0159, "step": 31908 }, { "epoch": 3.278976572133169, "grad_norm": 0.08122528344392776, "learning_rate": 0.01, "loss": 2.0043, "step": 31911 }, { "epoch": 3.279284833538841, "grad_norm": 0.054671067744493484, "learning_rate": 0.01, "loss": 2.0054, "step": 31914 }, { "epoch": 3.279593094944513, "grad_norm": 0.11112997680902481, "learning_rate": 0.01, "loss": 1.9826, "step": 31917 }, { "epoch": 3.279901356350185, "grad_norm": 0.12811101973056793, "learning_rate": 0.01, "loss": 2.0242, "step": 31920 }, { "epoch": 3.280209617755857, "grad_norm": 0.07637016475200653, "learning_rate": 0.01, "loss": 1.9879, "step": 31923 }, { "epoch": 3.280517879161529, "grad_norm": 0.03716239705681801, "learning_rate": 0.01, "loss": 2.0055, "step": 31926 }, { "epoch": 3.280826140567201, "grad_norm": 0.03950963541865349, "learning_rate": 0.01, "loss": 1.9905, "step": 31929 }, { "epoch": 3.281134401972873, "grad_norm": 0.09012940526008606, "learning_rate": 0.01, "loss": 2.0422, "step": 31932 }, { "epoch": 3.281442663378545, "grad_norm": 0.04000500217080116, "learning_rate": 0.01, "loss": 2.0054, "step": 31935 }, { "epoch": 3.281750924784217, "grad_norm": 0.06817129254341125, "learning_rate": 0.01, "loss": 2.0131, "step": 31938 }, { "epoch": 3.282059186189889, "grad_norm": 0.14285409450531006, "learning_rate": 0.01, "loss": 2.0235, "step": 31941 }, { "epoch": 3.282367447595561, "grad_norm": 0.03885696083307266, "learning_rate": 0.01, "loss": 2.007, "step": 31944 }, { "epoch": 3.282675709001233, "grad_norm": 0.047379713505506516, "learning_rate": 0.01, "loss": 1.9929, "step": 31947 }, { "epoch": 3.282983970406905, "grad_norm": 0.05476104095578194, "learning_rate": 0.01, "loss": 2.0199, "step": 31950 }, { "epoch": 3.283292231812577, "grad_norm": 0.03158089146018028, "learning_rate": 0.01, "loss": 2.0053, "step": 31953 }, { "epoch": 3.283600493218249, "grad_norm": 0.04537857696413994, "learning_rate": 0.01, "loss": 1.991, "step": 31956 }, { "epoch": 3.283908754623921, "grad_norm": 0.1525343656539917, "learning_rate": 0.01, "loss": 1.9972, "step": 31959 }, { "epoch": 3.284217016029593, "grad_norm": 0.11916167289018631, "learning_rate": 0.01, "loss": 2.0089, "step": 31962 }, { "epoch": 3.2845252774352653, "grad_norm": 0.09215250611305237, "learning_rate": 0.01, "loss": 1.9746, "step": 31965 }, { "epoch": 3.284833538840937, "grad_norm": 0.053620483726263046, "learning_rate": 0.01, "loss": 2.0148, "step": 31968 }, { "epoch": 3.2851418002466093, "grad_norm": 0.03575912117958069, "learning_rate": 0.01, "loss": 2.0237, "step": 31971 }, { "epoch": 3.285450061652281, "grad_norm": 0.09148744493722916, "learning_rate": 0.01, "loss": 2.0179, "step": 31974 }, { "epoch": 3.2857583230579532, "grad_norm": 0.06433127075433731, "learning_rate": 0.01, "loss": 2.0172, "step": 31977 }, { "epoch": 3.286066584463625, "grad_norm": 0.04605916887521744, "learning_rate": 0.01, "loss": 1.9956, "step": 31980 }, { "epoch": 3.286374845869297, "grad_norm": 0.07150716334581375, "learning_rate": 0.01, "loss": 2.0251, "step": 31983 }, { "epoch": 3.2866831072749694, "grad_norm": 0.04914524033665657, "learning_rate": 0.01, "loss": 2.0002, "step": 31986 }, { "epoch": 3.286991368680641, "grad_norm": 0.10281821340322495, "learning_rate": 0.01, "loss": 2.0137, "step": 31989 }, { "epoch": 3.2872996300863133, "grad_norm": 0.04685597866773605, "learning_rate": 0.01, "loss": 2.0283, "step": 31992 }, { "epoch": 3.287607891491985, "grad_norm": 0.04735150560736656, "learning_rate": 0.01, "loss": 2.005, "step": 31995 }, { "epoch": 3.2879161528976573, "grad_norm": 0.07897822558879852, "learning_rate": 0.01, "loss": 2.0123, "step": 31998 }, { "epoch": 3.288224414303329, "grad_norm": 0.08091110736131668, "learning_rate": 0.01, "loss": 2.0397, "step": 32001 }, { "epoch": 3.288532675709001, "grad_norm": 0.038828156888484955, "learning_rate": 0.01, "loss": 2.0245, "step": 32004 }, { "epoch": 3.2888409371146734, "grad_norm": 0.11410044133663177, "learning_rate": 0.01, "loss": 2.0081, "step": 32007 }, { "epoch": 3.289149198520345, "grad_norm": 0.11741339415311813, "learning_rate": 0.01, "loss": 2.0055, "step": 32010 }, { "epoch": 3.2894574599260173, "grad_norm": 0.054089032113552094, "learning_rate": 0.01, "loss": 2.0179, "step": 32013 }, { "epoch": 3.289765721331689, "grad_norm": 0.034770797938108444, "learning_rate": 0.01, "loss": 2.0148, "step": 32016 }, { "epoch": 3.2900739827373613, "grad_norm": 0.06313812732696533, "learning_rate": 0.01, "loss": 2.0294, "step": 32019 }, { "epoch": 3.2903822441430335, "grad_norm": 0.05844837799668312, "learning_rate": 0.01, "loss": 2.0227, "step": 32022 }, { "epoch": 3.2906905055487052, "grad_norm": 0.0374666191637516, "learning_rate": 0.01, "loss": 2.0342, "step": 32025 }, { "epoch": 3.2909987669543774, "grad_norm": 0.05456427484750748, "learning_rate": 0.01, "loss": 1.9794, "step": 32028 }, { "epoch": 3.291307028360049, "grad_norm": 0.11318197846412659, "learning_rate": 0.01, "loss": 1.9995, "step": 32031 }, { "epoch": 3.2916152897657214, "grad_norm": 0.04832073673605919, "learning_rate": 0.01, "loss": 2.0108, "step": 32034 }, { "epoch": 3.2919235511713936, "grad_norm": 0.07790713757276535, "learning_rate": 0.01, "loss": 1.9893, "step": 32037 }, { "epoch": 3.2922318125770653, "grad_norm": 0.05794338509440422, "learning_rate": 0.01, "loss": 2.0029, "step": 32040 }, { "epoch": 3.2925400739827375, "grad_norm": 0.06488461047410965, "learning_rate": 0.01, "loss": 1.9965, "step": 32043 }, { "epoch": 3.2928483353884093, "grad_norm": 0.10804028809070587, "learning_rate": 0.01, "loss": 1.9938, "step": 32046 }, { "epoch": 3.2931565967940815, "grad_norm": 0.04927225038409233, "learning_rate": 0.01, "loss": 2.0111, "step": 32049 }, { "epoch": 3.293464858199753, "grad_norm": 0.039539139717817307, "learning_rate": 0.01, "loss": 2.0129, "step": 32052 }, { "epoch": 3.2937731196054254, "grad_norm": 0.046698734164237976, "learning_rate": 0.01, "loss": 2.0149, "step": 32055 }, { "epoch": 3.2940813810110976, "grad_norm": 0.07978003472089767, "learning_rate": 0.01, "loss": 2.0001, "step": 32058 }, { "epoch": 3.2943896424167693, "grad_norm": 0.05909251049160957, "learning_rate": 0.01, "loss": 2.018, "step": 32061 }, { "epoch": 3.2946979038224415, "grad_norm": 0.09814689308404922, "learning_rate": 0.01, "loss": 1.9935, "step": 32064 }, { "epoch": 3.2950061652281133, "grad_norm": 0.07647134363651276, "learning_rate": 0.01, "loss": 2.0316, "step": 32067 }, { "epoch": 3.2953144266337855, "grad_norm": 0.09203072637319565, "learning_rate": 0.01, "loss": 2.0075, "step": 32070 }, { "epoch": 3.2956226880394572, "grad_norm": 0.14764010906219482, "learning_rate": 0.01, "loss": 2.0052, "step": 32073 }, { "epoch": 3.2959309494451294, "grad_norm": 0.07483326643705368, "learning_rate": 0.01, "loss": 2.0065, "step": 32076 }, { "epoch": 3.2962392108508016, "grad_norm": 0.03683464601635933, "learning_rate": 0.01, "loss": 2.0092, "step": 32079 }, { "epoch": 3.2965474722564734, "grad_norm": 0.0658450499176979, "learning_rate": 0.01, "loss": 2.0134, "step": 32082 }, { "epoch": 3.2968557336621456, "grad_norm": 0.05505736172199249, "learning_rate": 0.01, "loss": 1.9978, "step": 32085 }, { "epoch": 3.2971639950678173, "grad_norm": 0.07295443117618561, "learning_rate": 0.01, "loss": 2.0444, "step": 32088 }, { "epoch": 3.2974722564734895, "grad_norm": 0.03865521401166916, "learning_rate": 0.01, "loss": 2.0084, "step": 32091 }, { "epoch": 3.2977805178791617, "grad_norm": 0.12185568362474442, "learning_rate": 0.01, "loss": 2.0011, "step": 32094 }, { "epoch": 3.2980887792848335, "grad_norm": 0.04646170511841774, "learning_rate": 0.01, "loss": 1.9925, "step": 32097 }, { "epoch": 3.2983970406905057, "grad_norm": 0.10177022218704224, "learning_rate": 0.01, "loss": 2.0001, "step": 32100 }, { "epoch": 3.2987053020961774, "grad_norm": 0.04585393890738487, "learning_rate": 0.01, "loss": 2.0032, "step": 32103 }, { "epoch": 3.2990135635018496, "grad_norm": 0.1089714989066124, "learning_rate": 0.01, "loss": 1.9943, "step": 32106 }, { "epoch": 3.299321824907522, "grad_norm": 0.04579438269138336, "learning_rate": 0.01, "loss": 2.0241, "step": 32109 }, { "epoch": 3.2996300863131935, "grad_norm": 0.0699036568403244, "learning_rate": 0.01, "loss": 2.025, "step": 32112 }, { "epoch": 3.2999383477188657, "grad_norm": 0.10070015490055084, "learning_rate": 0.01, "loss": 2.0037, "step": 32115 }, { "epoch": 3.3002466091245375, "grad_norm": 0.08273176848888397, "learning_rate": 0.01, "loss": 2.0368, "step": 32118 }, { "epoch": 3.3005548705302097, "grad_norm": 0.08648907393217087, "learning_rate": 0.01, "loss": 2.0069, "step": 32121 }, { "epoch": 3.3008631319358814, "grad_norm": 0.059946708381175995, "learning_rate": 0.01, "loss": 2.0023, "step": 32124 }, { "epoch": 3.3011713933415536, "grad_norm": 0.034971099346876144, "learning_rate": 0.01, "loss": 1.9999, "step": 32127 }, { "epoch": 3.301479654747226, "grad_norm": 0.12256456911563873, "learning_rate": 0.01, "loss": 1.9763, "step": 32130 }, { "epoch": 3.3017879161528976, "grad_norm": 0.041138794273138046, "learning_rate": 0.01, "loss": 2.0417, "step": 32133 }, { "epoch": 3.3020961775585698, "grad_norm": 0.04145865887403488, "learning_rate": 0.01, "loss": 1.9986, "step": 32136 }, { "epoch": 3.3024044389642415, "grad_norm": 0.07052136212587357, "learning_rate": 0.01, "loss": 1.9986, "step": 32139 }, { "epoch": 3.3027127003699137, "grad_norm": 0.05553466081619263, "learning_rate": 0.01, "loss": 1.9964, "step": 32142 }, { "epoch": 3.3030209617755855, "grad_norm": 0.038209814578294754, "learning_rate": 0.01, "loss": 2.0056, "step": 32145 }, { "epoch": 3.3033292231812577, "grad_norm": 0.04882222041487694, "learning_rate": 0.01, "loss": 2.019, "step": 32148 }, { "epoch": 3.30363748458693, "grad_norm": 0.11157884448766708, "learning_rate": 0.01, "loss": 2.0012, "step": 32151 }, { "epoch": 3.3039457459926016, "grad_norm": 0.09312507510185242, "learning_rate": 0.01, "loss": 2.0221, "step": 32154 }, { "epoch": 3.304254007398274, "grad_norm": 0.05357594043016434, "learning_rate": 0.01, "loss": 1.9986, "step": 32157 }, { "epoch": 3.304562268803946, "grad_norm": 0.07024755328893661, "learning_rate": 0.01, "loss": 2.0032, "step": 32160 }, { "epoch": 3.3048705302096177, "grad_norm": 0.0773010402917862, "learning_rate": 0.01, "loss": 2.0273, "step": 32163 }, { "epoch": 3.30517879161529, "grad_norm": 0.04807543754577637, "learning_rate": 0.01, "loss": 2.0101, "step": 32166 }, { "epoch": 3.3054870530209617, "grad_norm": 0.03652816265821457, "learning_rate": 0.01, "loss": 1.9997, "step": 32169 }, { "epoch": 3.305795314426634, "grad_norm": 0.06250651925802231, "learning_rate": 0.01, "loss": 2.0255, "step": 32172 }, { "epoch": 3.3061035758323056, "grad_norm": 0.08029799908399582, "learning_rate": 0.01, "loss": 2.0066, "step": 32175 }, { "epoch": 3.306411837237978, "grad_norm": 0.10732737928628922, "learning_rate": 0.01, "loss": 1.9864, "step": 32178 }, { "epoch": 3.30672009864365, "grad_norm": 0.07458434998989105, "learning_rate": 0.01, "loss": 2.0043, "step": 32181 }, { "epoch": 3.3070283600493218, "grad_norm": 0.0794452428817749, "learning_rate": 0.01, "loss": 2.0239, "step": 32184 }, { "epoch": 3.307336621454994, "grad_norm": 0.06960796564817429, "learning_rate": 0.01, "loss": 2.0533, "step": 32187 }, { "epoch": 3.3076448828606657, "grad_norm": 0.09482841193675995, "learning_rate": 0.01, "loss": 1.9819, "step": 32190 }, { "epoch": 3.307953144266338, "grad_norm": 0.08358022570610046, "learning_rate": 0.01, "loss": 1.9936, "step": 32193 }, { "epoch": 3.3082614056720097, "grad_norm": 0.03823438659310341, "learning_rate": 0.01, "loss": 1.9934, "step": 32196 }, { "epoch": 3.308569667077682, "grad_norm": 0.07413027435541153, "learning_rate": 0.01, "loss": 2.0097, "step": 32199 }, { "epoch": 3.308877928483354, "grad_norm": 0.03165535256266594, "learning_rate": 0.01, "loss": 2.0157, "step": 32202 }, { "epoch": 3.309186189889026, "grad_norm": 0.1173541322350502, "learning_rate": 0.01, "loss": 2.0229, "step": 32205 }, { "epoch": 3.309494451294698, "grad_norm": 0.07542740553617477, "learning_rate": 0.01, "loss": 1.9974, "step": 32208 }, { "epoch": 3.3098027127003697, "grad_norm": 0.104609914124012, "learning_rate": 0.01, "loss": 2.0312, "step": 32211 }, { "epoch": 3.310110974106042, "grad_norm": 0.0530179999768734, "learning_rate": 0.01, "loss": 1.9999, "step": 32214 }, { "epoch": 3.3104192355117137, "grad_norm": 0.070836141705513, "learning_rate": 0.01, "loss": 1.992, "step": 32217 }, { "epoch": 3.310727496917386, "grad_norm": 0.04056829586625099, "learning_rate": 0.01, "loss": 2.0202, "step": 32220 }, { "epoch": 3.311035758323058, "grad_norm": 0.10000187903642654, "learning_rate": 0.01, "loss": 2.0125, "step": 32223 }, { "epoch": 3.31134401972873, "grad_norm": 0.04653482139110565, "learning_rate": 0.01, "loss": 1.9988, "step": 32226 }, { "epoch": 3.311652281134402, "grad_norm": 0.10693076252937317, "learning_rate": 0.01, "loss": 1.9757, "step": 32229 }, { "epoch": 3.311960542540074, "grad_norm": 0.055378034710884094, "learning_rate": 0.01, "loss": 2.021, "step": 32232 }, { "epoch": 3.312268803945746, "grad_norm": 0.037242140620946884, "learning_rate": 0.01, "loss": 2.0367, "step": 32235 }, { "epoch": 3.312577065351418, "grad_norm": 0.04497090354561806, "learning_rate": 0.01, "loss": 1.997, "step": 32238 }, { "epoch": 3.31288532675709, "grad_norm": 0.048384904861450195, "learning_rate": 0.01, "loss": 1.9945, "step": 32241 }, { "epoch": 3.313193588162762, "grad_norm": 0.05588208884000778, "learning_rate": 0.01, "loss": 2.0375, "step": 32244 }, { "epoch": 3.313501849568434, "grad_norm": 0.05643755942583084, "learning_rate": 0.01, "loss": 2.0384, "step": 32247 }, { "epoch": 3.313810110974106, "grad_norm": 0.03415621444582939, "learning_rate": 0.01, "loss": 2.0217, "step": 32250 }, { "epoch": 3.3141183723797782, "grad_norm": 0.03537356108427048, "learning_rate": 0.01, "loss": 2.0046, "step": 32253 }, { "epoch": 3.31442663378545, "grad_norm": 0.1299961805343628, "learning_rate": 0.01, "loss": 1.9806, "step": 32256 }, { "epoch": 3.314734895191122, "grad_norm": 0.05058746412396431, "learning_rate": 0.01, "loss": 2.0076, "step": 32259 }, { "epoch": 3.315043156596794, "grad_norm": 0.06381676346063614, "learning_rate": 0.01, "loss": 2.0156, "step": 32262 }, { "epoch": 3.315351418002466, "grad_norm": 0.039552103728055954, "learning_rate": 0.01, "loss": 1.9853, "step": 32265 }, { "epoch": 3.315659679408138, "grad_norm": 0.03872091323137283, "learning_rate": 0.01, "loss": 2.0174, "step": 32268 }, { "epoch": 3.31596794081381, "grad_norm": 0.04546678811311722, "learning_rate": 0.01, "loss": 2.0176, "step": 32271 }, { "epoch": 3.3162762022194823, "grad_norm": 0.04541923850774765, "learning_rate": 0.01, "loss": 2.023, "step": 32274 }, { "epoch": 3.316584463625154, "grad_norm": 0.04341261461377144, "learning_rate": 0.01, "loss": 2.0179, "step": 32277 }, { "epoch": 3.316892725030826, "grad_norm": 0.10473504662513733, "learning_rate": 0.01, "loss": 2.0263, "step": 32280 }, { "epoch": 3.317200986436498, "grad_norm": 0.0668938085436821, "learning_rate": 0.01, "loss": 2.0417, "step": 32283 }, { "epoch": 3.31750924784217, "grad_norm": 0.12829263508319855, "learning_rate": 0.01, "loss": 2.022, "step": 32286 }, { "epoch": 3.3178175092478424, "grad_norm": 0.05443095043301582, "learning_rate": 0.01, "loss": 2.0101, "step": 32289 }, { "epoch": 3.318125770653514, "grad_norm": 0.06268401443958282, "learning_rate": 0.01, "loss": 2.0178, "step": 32292 }, { "epoch": 3.3184340320591863, "grad_norm": 0.05700231343507767, "learning_rate": 0.01, "loss": 2.0203, "step": 32295 }, { "epoch": 3.318742293464858, "grad_norm": 0.07467647641897202, "learning_rate": 0.01, "loss": 2.0031, "step": 32298 }, { "epoch": 3.3190505548705302, "grad_norm": 0.0675196647644043, "learning_rate": 0.01, "loss": 2.013, "step": 32301 }, { "epoch": 3.3193588162762024, "grad_norm": 0.044399552047252655, "learning_rate": 0.01, "loss": 1.993, "step": 32304 }, { "epoch": 3.319667077681874, "grad_norm": 0.0451500304043293, "learning_rate": 0.01, "loss": 2.0158, "step": 32307 }, { "epoch": 3.3199753390875464, "grad_norm": 0.041543807834386826, "learning_rate": 0.01, "loss": 2.0157, "step": 32310 }, { "epoch": 3.320283600493218, "grad_norm": 0.08818163722753525, "learning_rate": 0.01, "loss": 1.9928, "step": 32313 }, { "epoch": 3.3205918618988903, "grad_norm": 0.05897468701004982, "learning_rate": 0.01, "loss": 1.999, "step": 32316 }, { "epoch": 3.320900123304562, "grad_norm": 0.10485360026359558, "learning_rate": 0.01, "loss": 2.0178, "step": 32319 }, { "epoch": 3.3212083847102343, "grad_norm": 0.048387110233306885, "learning_rate": 0.01, "loss": 2.021, "step": 32322 }, { "epoch": 3.3215166461159065, "grad_norm": 0.07535526156425476, "learning_rate": 0.01, "loss": 2.0228, "step": 32325 }, { "epoch": 3.321824907521578, "grad_norm": 0.04594804719090462, "learning_rate": 0.01, "loss": 2.0271, "step": 32328 }, { "epoch": 3.3221331689272504, "grad_norm": 0.03757678344845772, "learning_rate": 0.01, "loss": 2.0177, "step": 32331 }, { "epoch": 3.322441430332922, "grad_norm": 0.046382974833250046, "learning_rate": 0.01, "loss": 2.021, "step": 32334 }, { "epoch": 3.3227496917385944, "grad_norm": 0.03983695060014725, "learning_rate": 0.01, "loss": 1.9877, "step": 32337 }, { "epoch": 3.323057953144266, "grad_norm": 0.0615588016808033, "learning_rate": 0.01, "loss": 2.0374, "step": 32340 }, { "epoch": 3.3233662145499383, "grad_norm": 0.10143701732158661, "learning_rate": 0.01, "loss": 2.0262, "step": 32343 }, { "epoch": 3.3236744759556105, "grad_norm": 0.06885727494955063, "learning_rate": 0.01, "loss": 2.0055, "step": 32346 }, { "epoch": 3.3239827373612822, "grad_norm": 0.10029948502779007, "learning_rate": 0.01, "loss": 2.0224, "step": 32349 }, { "epoch": 3.3242909987669544, "grad_norm": 0.08583710342645645, "learning_rate": 0.01, "loss": 1.988, "step": 32352 }, { "epoch": 3.3245992601726266, "grad_norm": 0.08596043288707733, "learning_rate": 0.01, "loss": 2.0065, "step": 32355 }, { "epoch": 3.3249075215782984, "grad_norm": 0.0431043840944767, "learning_rate": 0.01, "loss": 2.0107, "step": 32358 }, { "epoch": 3.3252157829839706, "grad_norm": 0.044695861637592316, "learning_rate": 0.01, "loss": 1.9898, "step": 32361 }, { "epoch": 3.3255240443896423, "grad_norm": 0.03210937976837158, "learning_rate": 0.01, "loss": 2.0058, "step": 32364 }, { "epoch": 3.3258323057953145, "grad_norm": 0.03838266804814339, "learning_rate": 0.01, "loss": 2.0145, "step": 32367 }, { "epoch": 3.3261405672009863, "grad_norm": 0.05611315369606018, "learning_rate": 0.01, "loss": 2.0045, "step": 32370 }, { "epoch": 3.3264488286066585, "grad_norm": 0.10025975853204727, "learning_rate": 0.01, "loss": 2.0047, "step": 32373 }, { "epoch": 3.3267570900123307, "grad_norm": 0.10004039853811264, "learning_rate": 0.01, "loss": 2.0171, "step": 32376 }, { "epoch": 3.3270653514180024, "grad_norm": 0.0655856728553772, "learning_rate": 0.01, "loss": 2.0079, "step": 32379 }, { "epoch": 3.3273736128236746, "grad_norm": 0.09608997404575348, "learning_rate": 0.01, "loss": 2.0069, "step": 32382 }, { "epoch": 3.3276818742293464, "grad_norm": 0.14821460843086243, "learning_rate": 0.01, "loss": 2.0107, "step": 32385 }, { "epoch": 3.3279901356350186, "grad_norm": 0.05276164412498474, "learning_rate": 0.01, "loss": 2.0369, "step": 32388 }, { "epoch": 3.3282983970406903, "grad_norm": 0.049289003014564514, "learning_rate": 0.01, "loss": 1.9977, "step": 32391 }, { "epoch": 3.3286066584463625, "grad_norm": 0.05115703493356705, "learning_rate": 0.01, "loss": 2.004, "step": 32394 }, { "epoch": 3.3289149198520347, "grad_norm": 0.04041285440325737, "learning_rate": 0.01, "loss": 2.0262, "step": 32397 }, { "epoch": 3.3292231812577064, "grad_norm": 0.04861520603299141, "learning_rate": 0.01, "loss": 1.9955, "step": 32400 }, { "epoch": 3.3295314426633786, "grad_norm": 0.1111973226070404, "learning_rate": 0.01, "loss": 2.0101, "step": 32403 }, { "epoch": 3.3298397040690504, "grad_norm": 0.061960045248270035, "learning_rate": 0.01, "loss": 1.9887, "step": 32406 }, { "epoch": 3.3301479654747226, "grad_norm": 0.1108783558011055, "learning_rate": 0.01, "loss": 2.0207, "step": 32409 }, { "epoch": 3.3304562268803943, "grad_norm": 0.06897444278001785, "learning_rate": 0.01, "loss": 2.0321, "step": 32412 }, { "epoch": 3.3307644882860665, "grad_norm": 0.046277862042188644, "learning_rate": 0.01, "loss": 2.0018, "step": 32415 }, { "epoch": 3.3310727496917387, "grad_norm": 0.09898782521486282, "learning_rate": 0.01, "loss": 2.0146, "step": 32418 }, { "epoch": 3.3313810110974105, "grad_norm": 0.059529174119234085, "learning_rate": 0.01, "loss": 1.9783, "step": 32421 }, { "epoch": 3.3316892725030827, "grad_norm": 0.08007462322711945, "learning_rate": 0.01, "loss": 2.0324, "step": 32424 }, { "epoch": 3.331997533908755, "grad_norm": 0.07130561769008636, "learning_rate": 0.01, "loss": 2.0061, "step": 32427 }, { "epoch": 3.3323057953144266, "grad_norm": 0.04967787116765976, "learning_rate": 0.01, "loss": 2.0259, "step": 32430 }, { "epoch": 3.332614056720099, "grad_norm": 0.08194708079099655, "learning_rate": 0.01, "loss": 2.0137, "step": 32433 }, { "epoch": 3.3329223181257706, "grad_norm": 0.056519269943237305, "learning_rate": 0.01, "loss": 1.9917, "step": 32436 }, { "epoch": 3.3332305795314427, "grad_norm": 0.08086001873016357, "learning_rate": 0.01, "loss": 2.021, "step": 32439 }, { "epoch": 3.3335388409371145, "grad_norm": 0.04036881402134895, "learning_rate": 0.01, "loss": 2.0205, "step": 32442 }, { "epoch": 3.3338471023427867, "grad_norm": 0.04373360425233841, "learning_rate": 0.01, "loss": 2.0214, "step": 32445 }, { "epoch": 3.334155363748459, "grad_norm": 0.04562424495816231, "learning_rate": 0.01, "loss": 2.0093, "step": 32448 }, { "epoch": 3.3344636251541306, "grad_norm": 0.2069234549999237, "learning_rate": 0.01, "loss": 1.9904, "step": 32451 }, { "epoch": 3.334771886559803, "grad_norm": 0.08092590421438217, "learning_rate": 0.01, "loss": 2.0097, "step": 32454 }, { "epoch": 3.3350801479654746, "grad_norm": 0.059557970613241196, "learning_rate": 0.01, "loss": 2.0523, "step": 32457 }, { "epoch": 3.335388409371147, "grad_norm": 0.039045874029397964, "learning_rate": 0.01, "loss": 1.9891, "step": 32460 }, { "epoch": 3.3356966707768185, "grad_norm": 0.03718112036585808, "learning_rate": 0.01, "loss": 2.0099, "step": 32463 }, { "epoch": 3.3360049321824907, "grad_norm": 0.05162828043103218, "learning_rate": 0.01, "loss": 2.0151, "step": 32466 }, { "epoch": 3.336313193588163, "grad_norm": 0.05825696140527725, "learning_rate": 0.01, "loss": 2.0252, "step": 32469 }, { "epoch": 3.3366214549938347, "grad_norm": 0.0458202064037323, "learning_rate": 0.01, "loss": 2.0273, "step": 32472 }, { "epoch": 3.336929716399507, "grad_norm": 0.07931084930896759, "learning_rate": 0.01, "loss": 2.0026, "step": 32475 }, { "epoch": 3.3372379778051786, "grad_norm": 0.05673946067690849, "learning_rate": 0.01, "loss": 1.998, "step": 32478 }, { "epoch": 3.337546239210851, "grad_norm": 0.05536726489663124, "learning_rate": 0.01, "loss": 1.9994, "step": 32481 }, { "epoch": 3.337854500616523, "grad_norm": 0.053581688553094864, "learning_rate": 0.01, "loss": 1.9913, "step": 32484 }, { "epoch": 3.3381627620221948, "grad_norm": 0.057449061423540115, "learning_rate": 0.01, "loss": 1.9973, "step": 32487 }, { "epoch": 3.338471023427867, "grad_norm": 0.0768120214343071, "learning_rate": 0.01, "loss": 2.0294, "step": 32490 }, { "epoch": 3.3387792848335387, "grad_norm": 0.1322673261165619, "learning_rate": 0.01, "loss": 1.9827, "step": 32493 }, { "epoch": 3.339087546239211, "grad_norm": 0.10641443729400635, "learning_rate": 0.01, "loss": 1.9858, "step": 32496 }, { "epoch": 3.339395807644883, "grad_norm": 0.13880647718906403, "learning_rate": 0.01, "loss": 1.979, "step": 32499 }, { "epoch": 3.339704069050555, "grad_norm": 0.04653307795524597, "learning_rate": 0.01, "loss": 2.0206, "step": 32502 }, { "epoch": 3.340012330456227, "grad_norm": 0.05031618848443031, "learning_rate": 0.01, "loss": 2.0221, "step": 32505 }, { "epoch": 3.340320591861899, "grad_norm": 0.042637672275304794, "learning_rate": 0.01, "loss": 1.9986, "step": 32508 }, { "epoch": 3.340628853267571, "grad_norm": 0.03557129204273224, "learning_rate": 0.01, "loss": 2.0186, "step": 32511 }, { "epoch": 3.3409371146732427, "grad_norm": 0.04271169379353523, "learning_rate": 0.01, "loss": 1.9907, "step": 32514 }, { "epoch": 3.341245376078915, "grad_norm": 0.06750103831291199, "learning_rate": 0.01, "loss": 2.0013, "step": 32517 }, { "epoch": 3.341553637484587, "grad_norm": 0.05727256461977959, "learning_rate": 0.01, "loss": 2.0192, "step": 32520 }, { "epoch": 3.341861898890259, "grad_norm": 0.05737854540348053, "learning_rate": 0.01, "loss": 2.0194, "step": 32523 }, { "epoch": 3.342170160295931, "grad_norm": 0.04525689780712128, "learning_rate": 0.01, "loss": 1.9733, "step": 32526 }, { "epoch": 3.342478421701603, "grad_norm": 0.038834329694509506, "learning_rate": 0.01, "loss": 2.0287, "step": 32529 }, { "epoch": 3.342786683107275, "grad_norm": 0.14812909066677094, "learning_rate": 0.01, "loss": 1.9926, "step": 32532 }, { "epoch": 3.3430949445129468, "grad_norm": 0.09346423298120499, "learning_rate": 0.01, "loss": 1.9866, "step": 32535 }, { "epoch": 3.343403205918619, "grad_norm": 0.03221321851015091, "learning_rate": 0.01, "loss": 2.0114, "step": 32538 }, { "epoch": 3.343711467324291, "grad_norm": 0.05457564815878868, "learning_rate": 0.01, "loss": 2.0044, "step": 32541 }, { "epoch": 3.344019728729963, "grad_norm": 0.05203322321176529, "learning_rate": 0.01, "loss": 1.9944, "step": 32544 }, { "epoch": 3.344327990135635, "grad_norm": 0.05318872258067131, "learning_rate": 0.01, "loss": 2.0199, "step": 32547 }, { "epoch": 3.344636251541307, "grad_norm": 0.034635335206985474, "learning_rate": 0.01, "loss": 1.9841, "step": 32550 }, { "epoch": 3.344944512946979, "grad_norm": 0.0421120747923851, "learning_rate": 0.01, "loss": 1.9923, "step": 32553 }, { "epoch": 3.3452527743526512, "grad_norm": 0.08523180335760117, "learning_rate": 0.01, "loss": 2.0162, "step": 32556 }, { "epoch": 3.345561035758323, "grad_norm": 0.11694061011075974, "learning_rate": 0.01, "loss": 1.9921, "step": 32559 }, { "epoch": 3.345869297163995, "grad_norm": 0.05000199005007744, "learning_rate": 0.01, "loss": 2.0159, "step": 32562 }, { "epoch": 3.346177558569667, "grad_norm": 0.0399484746158123, "learning_rate": 0.01, "loss": 2.0316, "step": 32565 }, { "epoch": 3.346485819975339, "grad_norm": 0.0491316057741642, "learning_rate": 0.01, "loss": 2.0184, "step": 32568 }, { "epoch": 3.3467940813810113, "grad_norm": 0.042924270033836365, "learning_rate": 0.01, "loss": 1.9983, "step": 32571 }, { "epoch": 3.347102342786683, "grad_norm": 0.03486446663737297, "learning_rate": 0.01, "loss": 2.023, "step": 32574 }, { "epoch": 3.3474106041923553, "grad_norm": 0.031064294278621674, "learning_rate": 0.01, "loss": 2.0249, "step": 32577 }, { "epoch": 3.347718865598027, "grad_norm": 0.05951589718461037, "learning_rate": 0.01, "loss": 1.9989, "step": 32580 }, { "epoch": 3.348027127003699, "grad_norm": 0.04387381300330162, "learning_rate": 0.01, "loss": 2.0158, "step": 32583 }, { "epoch": 3.348335388409371, "grad_norm": 0.05328337103128433, "learning_rate": 0.01, "loss": 1.9941, "step": 32586 }, { "epoch": 3.348643649815043, "grad_norm": 0.04561325162649155, "learning_rate": 0.01, "loss": 2.0039, "step": 32589 }, { "epoch": 3.3489519112207153, "grad_norm": 0.047260623425245285, "learning_rate": 0.01, "loss": 2.0387, "step": 32592 }, { "epoch": 3.349260172626387, "grad_norm": 0.21082252264022827, "learning_rate": 0.01, "loss": 2.024, "step": 32595 }, { "epoch": 3.3495684340320593, "grad_norm": 0.08391027897596359, "learning_rate": 0.01, "loss": 1.9919, "step": 32598 }, { "epoch": 3.349876695437731, "grad_norm": 0.07036472856998444, "learning_rate": 0.01, "loss": 2.0218, "step": 32601 }, { "epoch": 3.3501849568434032, "grad_norm": 0.03812922164797783, "learning_rate": 0.01, "loss": 2.014, "step": 32604 }, { "epoch": 3.350493218249075, "grad_norm": 0.041235774755477905, "learning_rate": 0.01, "loss": 2.0135, "step": 32607 }, { "epoch": 3.350801479654747, "grad_norm": 0.05174950137734413, "learning_rate": 0.01, "loss": 2.026, "step": 32610 }, { "epoch": 3.3511097410604194, "grad_norm": 0.058260124176740646, "learning_rate": 0.01, "loss": 2.0177, "step": 32613 }, { "epoch": 3.351418002466091, "grad_norm": 0.045651067048311234, "learning_rate": 0.01, "loss": 2.0199, "step": 32616 }, { "epoch": 3.3517262638717633, "grad_norm": 0.043610829859972, "learning_rate": 0.01, "loss": 2.0253, "step": 32619 }, { "epoch": 3.3520345252774355, "grad_norm": 0.04924603924155235, "learning_rate": 0.01, "loss": 2.0141, "step": 32622 }, { "epoch": 3.3523427866831073, "grad_norm": 0.04765019193291664, "learning_rate": 0.01, "loss": 2.0398, "step": 32625 }, { "epoch": 3.3526510480887795, "grad_norm": 0.04744412377476692, "learning_rate": 0.01, "loss": 2.0159, "step": 32628 }, { "epoch": 3.352959309494451, "grad_norm": 0.09456950426101685, "learning_rate": 0.01, "loss": 2.0084, "step": 32631 }, { "epoch": 3.3532675709001234, "grad_norm": 0.17356513440608978, "learning_rate": 0.01, "loss": 2.0138, "step": 32634 }, { "epoch": 3.353575832305795, "grad_norm": 0.08420834690332413, "learning_rate": 0.01, "loss": 2.0004, "step": 32637 }, { "epoch": 3.3538840937114673, "grad_norm": 0.09453277289867401, "learning_rate": 0.01, "loss": 2.021, "step": 32640 }, { "epoch": 3.3541923551171395, "grad_norm": 0.05444180220365524, "learning_rate": 0.01, "loss": 2.0335, "step": 32643 }, { "epoch": 3.3545006165228113, "grad_norm": 0.04824339225888252, "learning_rate": 0.01, "loss": 1.9976, "step": 32646 }, { "epoch": 3.3548088779284835, "grad_norm": 0.06650727242231369, "learning_rate": 0.01, "loss": 2.0021, "step": 32649 }, { "epoch": 3.3551171393341552, "grad_norm": 0.05119656026363373, "learning_rate": 0.01, "loss": 2.0415, "step": 32652 }, { "epoch": 3.3554254007398274, "grad_norm": 0.044617343693971634, "learning_rate": 0.01, "loss": 1.9992, "step": 32655 }, { "epoch": 3.355733662145499, "grad_norm": 0.035579435527324677, "learning_rate": 0.01, "loss": 1.9993, "step": 32658 }, { "epoch": 3.3560419235511714, "grad_norm": 0.05802566558122635, "learning_rate": 0.01, "loss": 2.0112, "step": 32661 }, { "epoch": 3.3563501849568436, "grad_norm": 0.050934724509716034, "learning_rate": 0.01, "loss": 2.0039, "step": 32664 }, { "epoch": 3.3566584463625153, "grad_norm": 0.055400047451257706, "learning_rate": 0.01, "loss": 2.0179, "step": 32667 }, { "epoch": 3.3569667077681875, "grad_norm": 0.1315484195947647, "learning_rate": 0.01, "loss": 1.9883, "step": 32670 }, { "epoch": 3.3572749691738593, "grad_norm": 0.13136912882328033, "learning_rate": 0.01, "loss": 2.0123, "step": 32673 }, { "epoch": 3.3575832305795315, "grad_norm": 0.08574076741933823, "learning_rate": 0.01, "loss": 1.9902, "step": 32676 }, { "epoch": 3.357891491985203, "grad_norm": 0.04678389057517052, "learning_rate": 0.01, "loss": 2.0363, "step": 32679 }, { "epoch": 3.3581997533908754, "grad_norm": 0.03356343135237694, "learning_rate": 0.01, "loss": 2.0031, "step": 32682 }, { "epoch": 3.3585080147965476, "grad_norm": 0.046139661222696304, "learning_rate": 0.01, "loss": 2.0255, "step": 32685 }, { "epoch": 3.3588162762022193, "grad_norm": 0.03130761533975601, "learning_rate": 0.01, "loss": 2.0107, "step": 32688 }, { "epoch": 3.3591245376078915, "grad_norm": 0.17764367163181305, "learning_rate": 0.01, "loss": 1.9817, "step": 32691 }, { "epoch": 3.3594327990135637, "grad_norm": 0.04135056957602501, "learning_rate": 0.01, "loss": 2.0032, "step": 32694 }, { "epoch": 3.3597410604192355, "grad_norm": 0.11181548237800598, "learning_rate": 0.01, "loss": 2.048, "step": 32697 }, { "epoch": 3.3600493218249077, "grad_norm": 0.07631994783878326, "learning_rate": 0.01, "loss": 2.0222, "step": 32700 }, { "epoch": 3.3603575832305794, "grad_norm": 0.03839050978422165, "learning_rate": 0.01, "loss": 1.9715, "step": 32703 }, { "epoch": 3.3606658446362516, "grad_norm": 0.03893091529607773, "learning_rate": 0.01, "loss": 2.0204, "step": 32706 }, { "epoch": 3.3609741060419234, "grad_norm": 0.15776588022708893, "learning_rate": 0.01, "loss": 1.9747, "step": 32709 }, { "epoch": 3.3612823674475956, "grad_norm": 0.125548854470253, "learning_rate": 0.01, "loss": 2.0066, "step": 32712 }, { "epoch": 3.3615906288532678, "grad_norm": 0.06952936947345734, "learning_rate": 0.01, "loss": 2.0353, "step": 32715 }, { "epoch": 3.3618988902589395, "grad_norm": 0.03826635703444481, "learning_rate": 0.01, "loss": 2.0186, "step": 32718 }, { "epoch": 3.3622071516646117, "grad_norm": 0.03977655619382858, "learning_rate": 0.01, "loss": 2.0205, "step": 32721 }, { "epoch": 3.3625154130702835, "grad_norm": 0.02882550098001957, "learning_rate": 0.01, "loss": 1.9914, "step": 32724 }, { "epoch": 3.3628236744759556, "grad_norm": 0.03502441197633743, "learning_rate": 0.01, "loss": 1.9925, "step": 32727 }, { "epoch": 3.3631319358816274, "grad_norm": 0.04370797425508499, "learning_rate": 0.01, "loss": 2.0154, "step": 32730 }, { "epoch": 3.3634401972872996, "grad_norm": 0.03528802841901779, "learning_rate": 0.01, "loss": 2.0014, "step": 32733 }, { "epoch": 3.363748458692972, "grad_norm": 0.08671889454126358, "learning_rate": 0.01, "loss": 2.0048, "step": 32736 }, { "epoch": 3.3640567200986435, "grad_norm": 0.1123836413025856, "learning_rate": 0.01, "loss": 2.0119, "step": 32739 }, { "epoch": 3.3643649815043157, "grad_norm": 0.061064526438713074, "learning_rate": 0.01, "loss": 2.0054, "step": 32742 }, { "epoch": 3.3646732429099875, "grad_norm": 0.05037948489189148, "learning_rate": 0.01, "loss": 2.0282, "step": 32745 }, { "epoch": 3.3649815043156597, "grad_norm": 0.052206844091415405, "learning_rate": 0.01, "loss": 2.0226, "step": 32748 }, { "epoch": 3.365289765721332, "grad_norm": 0.05795833095908165, "learning_rate": 0.01, "loss": 2.0038, "step": 32751 }, { "epoch": 3.3655980271270036, "grad_norm": 0.030604414641857147, "learning_rate": 0.01, "loss": 2.0104, "step": 32754 }, { "epoch": 3.365906288532676, "grad_norm": 0.05441366508603096, "learning_rate": 0.01, "loss": 1.9975, "step": 32757 }, { "epoch": 3.3662145499383476, "grad_norm": 0.07509131729602814, "learning_rate": 0.01, "loss": 2.0117, "step": 32760 }, { "epoch": 3.3665228113440198, "grad_norm": 0.046888504177331924, "learning_rate": 0.01, "loss": 2.0158, "step": 32763 }, { "epoch": 3.366831072749692, "grad_norm": 0.05030560493469238, "learning_rate": 0.01, "loss": 2.0139, "step": 32766 }, { "epoch": 3.3671393341553637, "grad_norm": 0.0426168255507946, "learning_rate": 0.01, "loss": 2.0118, "step": 32769 }, { "epoch": 3.367447595561036, "grad_norm": 0.10896468907594681, "learning_rate": 0.01, "loss": 1.9939, "step": 32772 }, { "epoch": 3.3677558569667077, "grad_norm": 0.11696910113096237, "learning_rate": 0.01, "loss": 2.0353, "step": 32775 }, { "epoch": 3.36806411837238, "grad_norm": 0.07340724021196365, "learning_rate": 0.01, "loss": 2.0062, "step": 32778 }, { "epoch": 3.3683723797780516, "grad_norm": 0.037968121469020844, "learning_rate": 0.01, "loss": 2.0191, "step": 32781 }, { "epoch": 3.368680641183724, "grad_norm": 0.044434670358896255, "learning_rate": 0.01, "loss": 1.9993, "step": 32784 }, { "epoch": 3.368988902589396, "grad_norm": 0.03823886066675186, "learning_rate": 0.01, "loss": 2.0402, "step": 32787 }, { "epoch": 3.3692971639950677, "grad_norm": 0.06556801497936249, "learning_rate": 0.01, "loss": 2.0097, "step": 32790 }, { "epoch": 3.36960542540074, "grad_norm": 0.06128913164138794, "learning_rate": 0.01, "loss": 2.032, "step": 32793 }, { "epoch": 3.3699136868064117, "grad_norm": 0.08499012142419815, "learning_rate": 0.01, "loss": 2.0395, "step": 32796 }, { "epoch": 3.370221948212084, "grad_norm": 0.03410051763057709, "learning_rate": 0.01, "loss": 2.0055, "step": 32799 }, { "epoch": 3.3705302096177556, "grad_norm": 0.08818015456199646, "learning_rate": 0.01, "loss": 2.0034, "step": 32802 }, { "epoch": 3.370838471023428, "grad_norm": 0.045091863721609116, "learning_rate": 0.01, "loss": 2.0252, "step": 32805 }, { "epoch": 3.3711467324291, "grad_norm": 0.10982260853052139, "learning_rate": 0.01, "loss": 1.9912, "step": 32808 }, { "epoch": 3.3714549938347718, "grad_norm": 0.04633982852101326, "learning_rate": 0.01, "loss": 2.0256, "step": 32811 }, { "epoch": 3.371763255240444, "grad_norm": 0.04701898992061615, "learning_rate": 0.01, "loss": 2.0098, "step": 32814 }, { "epoch": 3.372071516646116, "grad_norm": 0.03449505567550659, "learning_rate": 0.01, "loss": 2.0046, "step": 32817 }, { "epoch": 3.372379778051788, "grad_norm": 0.03621023893356323, "learning_rate": 0.01, "loss": 1.9677, "step": 32820 }, { "epoch": 3.37268803945746, "grad_norm": 0.04743462800979614, "learning_rate": 0.01, "loss": 2.0308, "step": 32823 }, { "epoch": 3.372996300863132, "grad_norm": 0.04240218549966812, "learning_rate": 0.01, "loss": 2.0152, "step": 32826 }, { "epoch": 3.373304562268804, "grad_norm": 0.09400332719087601, "learning_rate": 0.01, "loss": 2.0098, "step": 32829 }, { "epoch": 3.373612823674476, "grad_norm": 0.07313279062509537, "learning_rate": 0.01, "loss": 2.0366, "step": 32832 }, { "epoch": 3.373921085080148, "grad_norm": 0.07604516297578812, "learning_rate": 0.01, "loss": 2.0013, "step": 32835 }, { "epoch": 3.37422934648582, "grad_norm": 0.044236812740564346, "learning_rate": 0.01, "loss": 2.0005, "step": 32838 }, { "epoch": 3.374537607891492, "grad_norm": 0.051601652055978775, "learning_rate": 0.01, "loss": 1.9981, "step": 32841 }, { "epoch": 3.374845869297164, "grad_norm": 0.10818912088871002, "learning_rate": 0.01, "loss": 2.001, "step": 32844 }, { "epoch": 3.375154130702836, "grad_norm": 0.04563935101032257, "learning_rate": 0.01, "loss": 2.0138, "step": 32847 }, { "epoch": 3.375462392108508, "grad_norm": 0.053665511310100555, "learning_rate": 0.01, "loss": 2.0193, "step": 32850 }, { "epoch": 3.37577065351418, "grad_norm": 0.08934652805328369, "learning_rate": 0.01, "loss": 2.0243, "step": 32853 }, { "epoch": 3.376078914919852, "grad_norm": 0.0752192884683609, "learning_rate": 0.01, "loss": 2.0429, "step": 32856 }, { "epoch": 3.376387176325524, "grad_norm": 0.05763142928481102, "learning_rate": 0.01, "loss": 2.0022, "step": 32859 }, { "epoch": 3.376695437731196, "grad_norm": 0.03926476463675499, "learning_rate": 0.01, "loss": 2.0132, "step": 32862 }, { "epoch": 3.377003699136868, "grad_norm": 0.05735384672880173, "learning_rate": 0.01, "loss": 2.0144, "step": 32865 }, { "epoch": 3.37731196054254, "grad_norm": 0.18665596842765808, "learning_rate": 0.01, "loss": 2.0028, "step": 32868 }, { "epoch": 3.377620221948212, "grad_norm": 0.06702150404453278, "learning_rate": 0.01, "loss": 2.0254, "step": 32871 }, { "epoch": 3.377928483353884, "grad_norm": 0.051258910447359085, "learning_rate": 0.01, "loss": 2.0135, "step": 32874 }, { "epoch": 3.378236744759556, "grad_norm": 0.05804390087723732, "learning_rate": 0.01, "loss": 2.0006, "step": 32877 }, { "epoch": 3.3785450061652282, "grad_norm": 0.04688677936792374, "learning_rate": 0.01, "loss": 2.0244, "step": 32880 }, { "epoch": 3.3788532675709, "grad_norm": 0.057768989354372025, "learning_rate": 0.01, "loss": 2.0082, "step": 32883 }, { "epoch": 3.379161528976572, "grad_norm": 0.05571329593658447, "learning_rate": 0.01, "loss": 2.0364, "step": 32886 }, { "epoch": 3.3794697903822444, "grad_norm": 0.04497957229614258, "learning_rate": 0.01, "loss": 1.9896, "step": 32889 }, { "epoch": 3.379778051787916, "grad_norm": 0.043453045189380646, "learning_rate": 0.01, "loss": 2.0196, "step": 32892 }, { "epoch": 3.3800863131935883, "grad_norm": 0.045709915459156036, "learning_rate": 0.01, "loss": 1.9904, "step": 32895 }, { "epoch": 3.38039457459926, "grad_norm": 0.05974254012107849, "learning_rate": 0.01, "loss": 2.0022, "step": 32898 }, { "epoch": 3.3807028360049323, "grad_norm": 0.16592206060886383, "learning_rate": 0.01, "loss": 2.0035, "step": 32901 }, { "epoch": 3.381011097410604, "grad_norm": 0.04671747609972954, "learning_rate": 0.01, "loss": 1.9948, "step": 32904 }, { "epoch": 3.381319358816276, "grad_norm": 0.07180918008089066, "learning_rate": 0.01, "loss": 2.0097, "step": 32907 }, { "epoch": 3.3816276202219484, "grad_norm": 0.06775587797164917, "learning_rate": 0.01, "loss": 2.0101, "step": 32910 }, { "epoch": 3.38193588162762, "grad_norm": 0.04381205141544342, "learning_rate": 0.01, "loss": 2.0053, "step": 32913 }, { "epoch": 3.3822441430332923, "grad_norm": 0.0345197468996048, "learning_rate": 0.01, "loss": 1.9829, "step": 32916 }, { "epoch": 3.382552404438964, "grad_norm": 0.04965033382177353, "learning_rate": 0.01, "loss": 2.0111, "step": 32919 }, { "epoch": 3.3828606658446363, "grad_norm": 0.20276527106761932, "learning_rate": 0.01, "loss": 2.0342, "step": 32922 }, { "epoch": 3.383168927250308, "grad_norm": 0.0702800378203392, "learning_rate": 0.01, "loss": 1.9971, "step": 32925 }, { "epoch": 3.3834771886559802, "grad_norm": 0.05775219202041626, "learning_rate": 0.01, "loss": 2.0062, "step": 32928 }, { "epoch": 3.3837854500616524, "grad_norm": 0.0662347599864006, "learning_rate": 0.01, "loss": 1.9962, "step": 32931 }, { "epoch": 3.384093711467324, "grad_norm": 0.05067736655473709, "learning_rate": 0.01, "loss": 1.9963, "step": 32934 }, { "epoch": 3.3844019728729964, "grad_norm": 0.057027652859687805, "learning_rate": 0.01, "loss": 2.0261, "step": 32937 }, { "epoch": 3.384710234278668, "grad_norm": 0.0408274307847023, "learning_rate": 0.01, "loss": 2.0139, "step": 32940 }, { "epoch": 3.3850184956843403, "grad_norm": 0.049467723816633224, "learning_rate": 0.01, "loss": 2.0133, "step": 32943 }, { "epoch": 3.3853267570900125, "grad_norm": 0.07573775947093964, "learning_rate": 0.01, "loss": 2.0331, "step": 32946 }, { "epoch": 3.3856350184956843, "grad_norm": 0.04027678817510605, "learning_rate": 0.01, "loss": 2.0118, "step": 32949 }, { "epoch": 3.3859432799013565, "grad_norm": 0.09980335086584091, "learning_rate": 0.01, "loss": 2.0158, "step": 32952 }, { "epoch": 3.386251541307028, "grad_norm": 0.06230602413415909, "learning_rate": 0.01, "loss": 2.0235, "step": 32955 }, { "epoch": 3.3865598027127004, "grad_norm": 0.09655454754829407, "learning_rate": 0.01, "loss": 2.0003, "step": 32958 }, { "epoch": 3.3868680641183726, "grad_norm": 0.053587790578603745, "learning_rate": 0.01, "loss": 2.002, "step": 32961 }, { "epoch": 3.3871763255240444, "grad_norm": 0.08395679295063019, "learning_rate": 0.01, "loss": 1.9928, "step": 32964 }, { "epoch": 3.3874845869297165, "grad_norm": 0.08353892713785172, "learning_rate": 0.01, "loss": 2.0052, "step": 32967 }, { "epoch": 3.3877928483353883, "grad_norm": 0.11298651993274689, "learning_rate": 0.01, "loss": 2.0039, "step": 32970 }, { "epoch": 3.3881011097410605, "grad_norm": 0.08317071199417114, "learning_rate": 0.01, "loss": 2.0095, "step": 32973 }, { "epoch": 3.3884093711467322, "grad_norm": 0.07725278288125992, "learning_rate": 0.01, "loss": 2.0255, "step": 32976 }, { "epoch": 3.3887176325524044, "grad_norm": 0.06264784932136536, "learning_rate": 0.01, "loss": 2.017, "step": 32979 }, { "epoch": 3.3890258939580766, "grad_norm": 0.0588025264441967, "learning_rate": 0.01, "loss": 2.0138, "step": 32982 }, { "epoch": 3.3893341553637484, "grad_norm": 0.033383727073669434, "learning_rate": 0.01, "loss": 2.0105, "step": 32985 }, { "epoch": 3.3896424167694206, "grad_norm": 0.04963357746601105, "learning_rate": 0.01, "loss": 2.0332, "step": 32988 }, { "epoch": 3.3899506781750923, "grad_norm": 0.03166192024946213, "learning_rate": 0.01, "loss": 1.9884, "step": 32991 }, { "epoch": 3.3902589395807645, "grad_norm": 0.0424019880592823, "learning_rate": 0.01, "loss": 1.9727, "step": 32994 }, { "epoch": 3.3905672009864363, "grad_norm": 0.0549466572701931, "learning_rate": 0.01, "loss": 1.9858, "step": 32997 }, { "epoch": 3.3908754623921085, "grad_norm": 0.06859169900417328, "learning_rate": 0.01, "loss": 1.9981, "step": 33000 }, { "epoch": 3.3911837237977807, "grad_norm": 0.05035685375332832, "learning_rate": 0.01, "loss": 2.0422, "step": 33003 }, { "epoch": 3.3914919852034524, "grad_norm": 0.10227832943201065, "learning_rate": 0.01, "loss": 1.9835, "step": 33006 }, { "epoch": 3.3918002466091246, "grad_norm": 0.052029043436050415, "learning_rate": 0.01, "loss": 2.0341, "step": 33009 }, { "epoch": 3.392108508014797, "grad_norm": 0.03569505736231804, "learning_rate": 0.01, "loss": 2.0065, "step": 33012 }, { "epoch": 3.3924167694204685, "grad_norm": 0.05492673069238663, "learning_rate": 0.01, "loss": 1.9839, "step": 33015 }, { "epoch": 3.3927250308261407, "grad_norm": 0.10698610544204712, "learning_rate": 0.01, "loss": 1.98, "step": 33018 }, { "epoch": 3.3930332922318125, "grad_norm": 0.051218993961811066, "learning_rate": 0.01, "loss": 1.9778, "step": 33021 }, { "epoch": 3.3933415536374847, "grad_norm": 0.10021807998418808, "learning_rate": 0.01, "loss": 1.997, "step": 33024 }, { "epoch": 3.3936498150431564, "grad_norm": 0.043556150048971176, "learning_rate": 0.01, "loss": 2.0145, "step": 33027 }, { "epoch": 3.3939580764488286, "grad_norm": 0.052555110305547714, "learning_rate": 0.01, "loss": 2.0, "step": 33030 }, { "epoch": 3.394266337854501, "grad_norm": 0.1656499058008194, "learning_rate": 0.01, "loss": 1.9747, "step": 33033 }, { "epoch": 3.3945745992601726, "grad_norm": 0.08107822388410568, "learning_rate": 0.01, "loss": 1.9894, "step": 33036 }, { "epoch": 3.3948828606658448, "grad_norm": 0.05703389272093773, "learning_rate": 0.01, "loss": 1.9935, "step": 33039 }, { "epoch": 3.3951911220715165, "grad_norm": 0.060602329671382904, "learning_rate": 0.01, "loss": 2.01, "step": 33042 }, { "epoch": 3.3954993834771887, "grad_norm": 0.05680840089917183, "learning_rate": 0.01, "loss": 2.0178, "step": 33045 }, { "epoch": 3.3958076448828605, "grad_norm": 0.053718626499176025, "learning_rate": 0.01, "loss": 2.0021, "step": 33048 }, { "epoch": 3.3961159062885327, "grad_norm": 0.04885102063417435, "learning_rate": 0.01, "loss": 2.0215, "step": 33051 }, { "epoch": 3.396424167694205, "grad_norm": 0.046444397419691086, "learning_rate": 0.01, "loss": 2.0117, "step": 33054 }, { "epoch": 3.3967324290998766, "grad_norm": 0.03031921572983265, "learning_rate": 0.01, "loss": 1.9973, "step": 33057 }, { "epoch": 3.397040690505549, "grad_norm": 0.048908550292253494, "learning_rate": 0.01, "loss": 2.0008, "step": 33060 }, { "epoch": 3.3973489519112205, "grad_norm": 0.06003925949335098, "learning_rate": 0.01, "loss": 1.9769, "step": 33063 }, { "epoch": 3.3976572133168927, "grad_norm": 0.10929730534553528, "learning_rate": 0.01, "loss": 1.998, "step": 33066 }, { "epoch": 3.3979654747225645, "grad_norm": 0.09032581746578217, "learning_rate": 0.01, "loss": 2.0331, "step": 33069 }, { "epoch": 3.3982737361282367, "grad_norm": 0.043940551578998566, "learning_rate": 0.01, "loss": 2.0076, "step": 33072 }, { "epoch": 3.398581997533909, "grad_norm": 0.10148674994707108, "learning_rate": 0.01, "loss": 1.9994, "step": 33075 }, { "epoch": 3.3988902589395806, "grad_norm": 0.04877715930342674, "learning_rate": 0.01, "loss": 2.0257, "step": 33078 }, { "epoch": 3.399198520345253, "grad_norm": 0.10485149919986725, "learning_rate": 0.01, "loss": 2.0297, "step": 33081 }, { "epoch": 3.399506781750925, "grad_norm": 0.12222550064325333, "learning_rate": 0.01, "loss": 2.0118, "step": 33084 }, { "epoch": 3.3998150431565968, "grad_norm": 0.0744134709239006, "learning_rate": 0.01, "loss": 2.0157, "step": 33087 }, { "epoch": 3.400123304562269, "grad_norm": 0.08050314337015152, "learning_rate": 0.01, "loss": 1.9932, "step": 33090 }, { "epoch": 3.4004315659679407, "grad_norm": 0.0806950256228447, "learning_rate": 0.01, "loss": 1.997, "step": 33093 }, { "epoch": 3.400739827373613, "grad_norm": 0.06369089335203171, "learning_rate": 0.01, "loss": 2.0101, "step": 33096 }, { "epoch": 3.4010480887792847, "grad_norm": 0.06041014939546585, "learning_rate": 0.01, "loss": 2.0408, "step": 33099 }, { "epoch": 3.401356350184957, "grad_norm": 0.0476149246096611, "learning_rate": 0.01, "loss": 1.988, "step": 33102 }, { "epoch": 3.401664611590629, "grad_norm": 0.05710010603070259, "learning_rate": 0.01, "loss": 2.0152, "step": 33105 }, { "epoch": 3.401972872996301, "grad_norm": 0.04351675137877464, "learning_rate": 0.01, "loss": 1.9752, "step": 33108 }, { "epoch": 3.402281134401973, "grad_norm": 0.04341613128781319, "learning_rate": 0.01, "loss": 2.0149, "step": 33111 }, { "epoch": 3.4025893958076447, "grad_norm": 0.04619600623846054, "learning_rate": 0.01, "loss": 2.0107, "step": 33114 }, { "epoch": 3.402897657213317, "grad_norm": 0.07674260437488556, "learning_rate": 0.01, "loss": 2.0243, "step": 33117 }, { "epoch": 3.4032059186189887, "grad_norm": 0.03869005665183067, "learning_rate": 0.01, "loss": 1.9774, "step": 33120 }, { "epoch": 3.403514180024661, "grad_norm": 0.08710911124944687, "learning_rate": 0.01, "loss": 1.995, "step": 33123 }, { "epoch": 3.403822441430333, "grad_norm": 0.09343576431274414, "learning_rate": 0.01, "loss": 1.9881, "step": 33126 }, { "epoch": 3.404130702836005, "grad_norm": 0.048723649233579636, "learning_rate": 0.01, "loss": 2.0273, "step": 33129 }, { "epoch": 3.404438964241677, "grad_norm": 0.07655228674411774, "learning_rate": 0.01, "loss": 2.0235, "step": 33132 }, { "epoch": 3.4047472256473488, "grad_norm": 0.08873139321804047, "learning_rate": 0.01, "loss": 1.9975, "step": 33135 }, { "epoch": 3.405055487053021, "grad_norm": 0.06514773517847061, "learning_rate": 0.01, "loss": 2.0167, "step": 33138 }, { "epoch": 3.405363748458693, "grad_norm": 0.06446196138858795, "learning_rate": 0.01, "loss": 2.0051, "step": 33141 }, { "epoch": 3.405672009864365, "grad_norm": 0.12146038562059402, "learning_rate": 0.01, "loss": 2.0171, "step": 33144 }, { "epoch": 3.405980271270037, "grad_norm": 0.06639200448989868, "learning_rate": 0.01, "loss": 2.0049, "step": 33147 }, { "epoch": 3.406288532675709, "grad_norm": 0.05644892901182175, "learning_rate": 0.01, "loss": 2.0172, "step": 33150 }, { "epoch": 3.406596794081381, "grad_norm": 0.037636831402778625, "learning_rate": 0.01, "loss": 2.008, "step": 33153 }, { "epoch": 3.4069050554870532, "grad_norm": 0.031332679092884064, "learning_rate": 0.01, "loss": 1.9821, "step": 33156 }, { "epoch": 3.407213316892725, "grad_norm": 0.051785338670015335, "learning_rate": 0.01, "loss": 2.011, "step": 33159 }, { "epoch": 3.407521578298397, "grad_norm": 0.07761172205209732, "learning_rate": 0.01, "loss": 2.0277, "step": 33162 }, { "epoch": 3.407829839704069, "grad_norm": 0.09536123275756836, "learning_rate": 0.01, "loss": 2.0222, "step": 33165 }, { "epoch": 3.408138101109741, "grad_norm": 0.04122615605592728, "learning_rate": 0.01, "loss": 2.0065, "step": 33168 }, { "epoch": 3.408446362515413, "grad_norm": 0.0360184982419014, "learning_rate": 0.01, "loss": 2.002, "step": 33171 }, { "epoch": 3.408754623921085, "grad_norm": 0.03222360834479332, "learning_rate": 0.01, "loss": 2.0083, "step": 33174 }, { "epoch": 3.4090628853267573, "grad_norm": 0.043042391538619995, "learning_rate": 0.01, "loss": 1.989, "step": 33177 }, { "epoch": 3.409371146732429, "grad_norm": 0.06593729555606842, "learning_rate": 0.01, "loss": 2.0115, "step": 33180 }, { "epoch": 3.409679408138101, "grad_norm": 0.09074060618877411, "learning_rate": 0.01, "loss": 2.0168, "step": 33183 }, { "epoch": 3.409987669543773, "grad_norm": 0.06270799785852432, "learning_rate": 0.01, "loss": 1.9973, "step": 33186 }, { "epoch": 3.410295930949445, "grad_norm": 0.08982829004526138, "learning_rate": 0.01, "loss": 1.9966, "step": 33189 }, { "epoch": 3.410604192355117, "grad_norm": 0.04844099283218384, "learning_rate": 0.01, "loss": 2.0223, "step": 33192 }, { "epoch": 3.410912453760789, "grad_norm": 0.07782240957021713, "learning_rate": 0.01, "loss": 2.025, "step": 33195 }, { "epoch": 3.4112207151664613, "grad_norm": 0.07867666333913803, "learning_rate": 0.01, "loss": 2.0076, "step": 33198 }, { "epoch": 3.411528976572133, "grad_norm": 0.07254987210035324, "learning_rate": 0.01, "loss": 2.0129, "step": 33201 }, { "epoch": 3.4118372379778052, "grad_norm": 0.04480341821908951, "learning_rate": 0.01, "loss": 1.9914, "step": 33204 }, { "epoch": 3.412145499383477, "grad_norm": 0.03986749053001404, "learning_rate": 0.01, "loss": 1.9867, "step": 33207 }, { "epoch": 3.412453760789149, "grad_norm": 0.04472361132502556, "learning_rate": 0.01, "loss": 2.0184, "step": 33210 }, { "epoch": 3.4127620221948214, "grad_norm": 0.07955579459667206, "learning_rate": 0.01, "loss": 2.01, "step": 33213 }, { "epoch": 3.413070283600493, "grad_norm": 0.056707967072725296, "learning_rate": 0.01, "loss": 2.0105, "step": 33216 }, { "epoch": 3.4133785450061653, "grad_norm": 0.04070746898651123, "learning_rate": 0.01, "loss": 2.0226, "step": 33219 }, { "epoch": 3.413686806411837, "grad_norm": 0.03773896023631096, "learning_rate": 0.01, "loss": 2.015, "step": 33222 }, { "epoch": 3.4139950678175093, "grad_norm": 0.10554299503564835, "learning_rate": 0.01, "loss": 2.0331, "step": 33225 }, { "epoch": 3.4143033292231815, "grad_norm": 0.0555710643529892, "learning_rate": 0.01, "loss": 1.996, "step": 33228 }, { "epoch": 3.414611590628853, "grad_norm": 0.1139519140124321, "learning_rate": 0.01, "loss": 1.9821, "step": 33231 }, { "epoch": 3.4149198520345254, "grad_norm": 0.042904384434223175, "learning_rate": 0.01, "loss": 2.0057, "step": 33234 }, { "epoch": 3.415228113440197, "grad_norm": 0.10528502613306046, "learning_rate": 0.01, "loss": 1.9781, "step": 33237 }, { "epoch": 3.4155363748458694, "grad_norm": 0.03914659097790718, "learning_rate": 0.01, "loss": 2.0029, "step": 33240 }, { "epoch": 3.415844636251541, "grad_norm": 0.11122586578130722, "learning_rate": 0.01, "loss": 1.9968, "step": 33243 }, { "epoch": 3.4161528976572133, "grad_norm": 0.06572670489549637, "learning_rate": 0.01, "loss": 2.0077, "step": 33246 }, { "epoch": 3.4164611590628855, "grad_norm": 0.05022534728050232, "learning_rate": 0.01, "loss": 2.0224, "step": 33249 }, { "epoch": 3.4167694204685573, "grad_norm": 0.08149400353431702, "learning_rate": 0.01, "loss": 2.0326, "step": 33252 }, { "epoch": 3.4170776818742294, "grad_norm": 0.04350002855062485, "learning_rate": 0.01, "loss": 1.9947, "step": 33255 }, { "epoch": 3.417385943279901, "grad_norm": 0.0445462241768837, "learning_rate": 0.01, "loss": 2.0372, "step": 33258 }, { "epoch": 3.4176942046855734, "grad_norm": 0.04082934185862541, "learning_rate": 0.01, "loss": 2.0199, "step": 33261 }, { "epoch": 3.418002466091245, "grad_norm": 0.060355301946401596, "learning_rate": 0.01, "loss": 2.0204, "step": 33264 }, { "epoch": 3.4183107274969173, "grad_norm": 0.04439264163374901, "learning_rate": 0.01, "loss": 2.0216, "step": 33267 }, { "epoch": 3.4186189889025895, "grad_norm": 0.11760549992322922, "learning_rate": 0.01, "loss": 2.0284, "step": 33270 }, { "epoch": 3.4189272503082613, "grad_norm": 0.08547448366880417, "learning_rate": 0.01, "loss": 2.0307, "step": 33273 }, { "epoch": 3.4192355117139335, "grad_norm": 0.06072860211133957, "learning_rate": 0.01, "loss": 1.992, "step": 33276 }, { "epoch": 3.4195437731196057, "grad_norm": 0.03637344762682915, "learning_rate": 0.01, "loss": 2.02, "step": 33279 }, { "epoch": 3.4198520345252774, "grad_norm": 0.0440024733543396, "learning_rate": 0.01, "loss": 2.007, "step": 33282 }, { "epoch": 3.4201602959309496, "grad_norm": 0.040933944284915924, "learning_rate": 0.01, "loss": 2.0253, "step": 33285 }, { "epoch": 3.4204685573366214, "grad_norm": 0.10233576595783234, "learning_rate": 0.01, "loss": 2.0433, "step": 33288 }, { "epoch": 3.4207768187422936, "grad_norm": 0.10494884103536606, "learning_rate": 0.01, "loss": 2.0007, "step": 33291 }, { "epoch": 3.4210850801479653, "grad_norm": 0.04247460886836052, "learning_rate": 0.01, "loss": 2.0119, "step": 33294 }, { "epoch": 3.4213933415536375, "grad_norm": 0.07929468154907227, "learning_rate": 0.01, "loss": 2.0067, "step": 33297 }, { "epoch": 3.4217016029593097, "grad_norm": 0.04947086423635483, "learning_rate": 0.01, "loss": 2.0049, "step": 33300 }, { "epoch": 3.4220098643649814, "grad_norm": 0.05473649874329567, "learning_rate": 0.01, "loss": 1.9872, "step": 33303 }, { "epoch": 3.4223181257706536, "grad_norm": 0.06789970397949219, "learning_rate": 0.01, "loss": 2.0173, "step": 33306 }, { "epoch": 3.4226263871763254, "grad_norm": 0.044122181832790375, "learning_rate": 0.01, "loss": 1.9957, "step": 33309 }, { "epoch": 3.4229346485819976, "grad_norm": 0.0713338777422905, "learning_rate": 0.01, "loss": 1.9912, "step": 33312 }, { "epoch": 3.4232429099876693, "grad_norm": 0.09774953871965408, "learning_rate": 0.01, "loss": 1.999, "step": 33315 }, { "epoch": 3.4235511713933415, "grad_norm": 0.049434032291173935, "learning_rate": 0.01, "loss": 2.0194, "step": 33318 }, { "epoch": 3.4238594327990137, "grad_norm": 0.06290262192487717, "learning_rate": 0.01, "loss": 2.0014, "step": 33321 }, { "epoch": 3.4241676942046855, "grad_norm": 0.042732108384370804, "learning_rate": 0.01, "loss": 1.9909, "step": 33324 }, { "epoch": 3.4244759556103577, "grad_norm": 0.03461041674017906, "learning_rate": 0.01, "loss": 1.9881, "step": 33327 }, { "epoch": 3.4247842170160294, "grad_norm": 0.04503572732210159, "learning_rate": 0.01, "loss": 1.9981, "step": 33330 }, { "epoch": 3.4250924784217016, "grad_norm": 0.04774646461009979, "learning_rate": 0.01, "loss": 2.0216, "step": 33333 }, { "epoch": 3.4254007398273734, "grad_norm": 0.08576779067516327, "learning_rate": 0.01, "loss": 2.0124, "step": 33336 }, { "epoch": 3.4257090012330456, "grad_norm": 0.13729599118232727, "learning_rate": 0.01, "loss": 2.0126, "step": 33339 }, { "epoch": 3.4260172626387178, "grad_norm": 0.060716260224580765, "learning_rate": 0.01, "loss": 2.0153, "step": 33342 }, { "epoch": 3.4263255240443895, "grad_norm": 0.037777479737997055, "learning_rate": 0.01, "loss": 2.0181, "step": 33345 }, { "epoch": 3.4266337854500617, "grad_norm": 0.042688485234975815, "learning_rate": 0.01, "loss": 1.9792, "step": 33348 }, { "epoch": 3.426942046855734, "grad_norm": 0.07009312510490417, "learning_rate": 0.01, "loss": 2.0143, "step": 33351 }, { "epoch": 3.4272503082614056, "grad_norm": 0.10260313749313354, "learning_rate": 0.01, "loss": 1.9985, "step": 33354 }, { "epoch": 3.427558569667078, "grad_norm": 0.08466068655252457, "learning_rate": 0.01, "loss": 1.9981, "step": 33357 }, { "epoch": 3.4278668310727496, "grad_norm": 0.061912521719932556, "learning_rate": 0.01, "loss": 2.0144, "step": 33360 }, { "epoch": 3.428175092478422, "grad_norm": 0.0470789298415184, "learning_rate": 0.01, "loss": 1.9863, "step": 33363 }, { "epoch": 3.4284833538840935, "grad_norm": 0.0477573424577713, "learning_rate": 0.01, "loss": 2.0189, "step": 33366 }, { "epoch": 3.4287916152897657, "grad_norm": 0.03324504569172859, "learning_rate": 0.01, "loss": 2.0107, "step": 33369 }, { "epoch": 3.429099876695438, "grad_norm": 0.07741666585206985, "learning_rate": 0.01, "loss": 2.0103, "step": 33372 }, { "epoch": 3.4294081381011097, "grad_norm": 0.05770926922559738, "learning_rate": 0.01, "loss": 2.0398, "step": 33375 }, { "epoch": 3.429716399506782, "grad_norm": 0.08471731096506119, "learning_rate": 0.01, "loss": 2.0451, "step": 33378 }, { "epoch": 3.4300246609124536, "grad_norm": 0.04667286202311516, "learning_rate": 0.01, "loss": 2.0136, "step": 33381 }, { "epoch": 3.430332922318126, "grad_norm": 0.0683809220790863, "learning_rate": 0.01, "loss": 2.0032, "step": 33384 }, { "epoch": 3.4306411837237976, "grad_norm": 0.07834406197071075, "learning_rate": 0.01, "loss": 2.0172, "step": 33387 }, { "epoch": 3.4309494451294698, "grad_norm": 0.04956913739442825, "learning_rate": 0.01, "loss": 2.0226, "step": 33390 }, { "epoch": 3.431257706535142, "grad_norm": 0.05492135509848595, "learning_rate": 0.01, "loss": 2.0153, "step": 33393 }, { "epoch": 3.4315659679408137, "grad_norm": 0.05343586578965187, "learning_rate": 0.01, "loss": 1.9995, "step": 33396 }, { "epoch": 3.431874229346486, "grad_norm": 0.04083942621946335, "learning_rate": 0.01, "loss": 2.005, "step": 33399 }, { "epoch": 3.4321824907521576, "grad_norm": 0.06474661827087402, "learning_rate": 0.01, "loss": 2.0238, "step": 33402 }, { "epoch": 3.43249075215783, "grad_norm": 0.09690015017986298, "learning_rate": 0.01, "loss": 1.9986, "step": 33405 }, { "epoch": 3.432799013563502, "grad_norm": 0.17796829342842102, "learning_rate": 0.01, "loss": 2.0288, "step": 33408 }, { "epoch": 3.433107274969174, "grad_norm": 0.11173928529024124, "learning_rate": 0.01, "loss": 2.0249, "step": 33411 }, { "epoch": 3.433415536374846, "grad_norm": 0.045607730746269226, "learning_rate": 0.01, "loss": 1.9694, "step": 33414 }, { "epoch": 3.4337237977805177, "grad_norm": 0.03982311487197876, "learning_rate": 0.01, "loss": 1.9895, "step": 33417 }, { "epoch": 3.43403205918619, "grad_norm": 0.03420604392886162, "learning_rate": 0.01, "loss": 1.9957, "step": 33420 }, { "epoch": 3.434340320591862, "grad_norm": 0.03757992014288902, "learning_rate": 0.01, "loss": 2.026, "step": 33423 }, { "epoch": 3.434648581997534, "grad_norm": 0.05664653331041336, "learning_rate": 0.01, "loss": 1.9884, "step": 33426 }, { "epoch": 3.434956843403206, "grad_norm": 0.05265260115265846, "learning_rate": 0.01, "loss": 2.0409, "step": 33429 }, { "epoch": 3.435265104808878, "grad_norm": 0.0430876798927784, "learning_rate": 0.01, "loss": 2.0004, "step": 33432 }, { "epoch": 3.43557336621455, "grad_norm": 0.04210485517978668, "learning_rate": 0.01, "loss": 2.0004, "step": 33435 }, { "epoch": 3.4358816276202218, "grad_norm": 0.049002841114997864, "learning_rate": 0.01, "loss": 2.0129, "step": 33438 }, { "epoch": 3.436189889025894, "grad_norm": 0.0786898285150528, "learning_rate": 0.01, "loss": 1.9915, "step": 33441 }, { "epoch": 3.436498150431566, "grad_norm": 0.04218638688325882, "learning_rate": 0.01, "loss": 2.0079, "step": 33444 }, { "epoch": 3.436806411837238, "grad_norm": 0.13452041149139404, "learning_rate": 0.01, "loss": 2.0105, "step": 33447 }, { "epoch": 3.43711467324291, "grad_norm": 0.06728319823741913, "learning_rate": 0.01, "loss": 2.0053, "step": 33450 }, { "epoch": 3.437422934648582, "grad_norm": 0.09041707217693329, "learning_rate": 0.01, "loss": 2.0059, "step": 33453 }, { "epoch": 3.437731196054254, "grad_norm": 0.04817497730255127, "learning_rate": 0.01, "loss": 2.0024, "step": 33456 }, { "epoch": 3.438039457459926, "grad_norm": 0.05033170431852341, "learning_rate": 0.01, "loss": 2.016, "step": 33459 }, { "epoch": 3.438347718865598, "grad_norm": 0.12856252491474152, "learning_rate": 0.01, "loss": 2.0202, "step": 33462 }, { "epoch": 3.43865598027127, "grad_norm": 0.03690528869628906, "learning_rate": 0.01, "loss": 2.0131, "step": 33465 }, { "epoch": 3.438964241676942, "grad_norm": 0.09053459018468857, "learning_rate": 0.01, "loss": 2.0057, "step": 33468 }, { "epoch": 3.439272503082614, "grad_norm": 0.11929309368133545, "learning_rate": 0.01, "loss": 1.9995, "step": 33471 }, { "epoch": 3.4395807644882863, "grad_norm": 0.06987284123897552, "learning_rate": 0.01, "loss": 1.9871, "step": 33474 }, { "epoch": 3.439889025893958, "grad_norm": 0.06181707605719566, "learning_rate": 0.01, "loss": 2.0167, "step": 33477 }, { "epoch": 3.4401972872996303, "grad_norm": 0.045914020389318466, "learning_rate": 0.01, "loss": 2.0067, "step": 33480 }, { "epoch": 3.440505548705302, "grad_norm": 0.04277556762099266, "learning_rate": 0.01, "loss": 2.0084, "step": 33483 }, { "epoch": 3.440813810110974, "grad_norm": 0.045943450182676315, "learning_rate": 0.01, "loss": 2.0117, "step": 33486 }, { "epoch": 3.441122071516646, "grad_norm": 0.04440785571932793, "learning_rate": 0.01, "loss": 1.9753, "step": 33489 }, { "epoch": 3.441430332922318, "grad_norm": 0.12033234536647797, "learning_rate": 0.01, "loss": 2.0216, "step": 33492 }, { "epoch": 3.4417385943279903, "grad_norm": 0.06069677323102951, "learning_rate": 0.01, "loss": 2.0139, "step": 33495 }, { "epoch": 3.442046855733662, "grad_norm": 0.08571046590805054, "learning_rate": 0.01, "loss": 2.0121, "step": 33498 }, { "epoch": 3.4423551171393343, "grad_norm": 0.05251142010092735, "learning_rate": 0.01, "loss": 2.0234, "step": 33501 }, { "epoch": 3.442663378545006, "grad_norm": 0.09658701717853546, "learning_rate": 0.01, "loss": 2.023, "step": 33504 }, { "epoch": 3.4429716399506782, "grad_norm": 0.10625968873500824, "learning_rate": 0.01, "loss": 2.0158, "step": 33507 }, { "epoch": 3.44327990135635, "grad_norm": 0.061645977199077606, "learning_rate": 0.01, "loss": 1.987, "step": 33510 }, { "epoch": 3.443588162762022, "grad_norm": 0.06879527121782303, "learning_rate": 0.01, "loss": 1.9988, "step": 33513 }, { "epoch": 3.4438964241676944, "grad_norm": 0.07986783981323242, "learning_rate": 0.01, "loss": 2.0087, "step": 33516 }, { "epoch": 3.444204685573366, "grad_norm": 0.06323929876089096, "learning_rate": 0.01, "loss": 2.0047, "step": 33519 }, { "epoch": 3.4445129469790383, "grad_norm": 0.08186205476522446, "learning_rate": 0.01, "loss": 2.0312, "step": 33522 }, { "epoch": 3.44482120838471, "grad_norm": 0.1049259677529335, "learning_rate": 0.01, "loss": 1.9993, "step": 33525 }, { "epoch": 3.4451294697903823, "grad_norm": 0.12427592277526855, "learning_rate": 0.01, "loss": 1.9984, "step": 33528 }, { "epoch": 3.445437731196054, "grad_norm": 0.04911283776164055, "learning_rate": 0.01, "loss": 1.9979, "step": 33531 }, { "epoch": 3.445745992601726, "grad_norm": 0.07451221346855164, "learning_rate": 0.01, "loss": 1.9814, "step": 33534 }, { "epoch": 3.4460542540073984, "grad_norm": 0.04838255047798157, "learning_rate": 0.01, "loss": 2.0044, "step": 33537 }, { "epoch": 3.44636251541307, "grad_norm": 0.0435669869184494, "learning_rate": 0.01, "loss": 2.0134, "step": 33540 }, { "epoch": 3.4466707768187423, "grad_norm": 0.12010036408901215, "learning_rate": 0.01, "loss": 1.965, "step": 33543 }, { "epoch": 3.4469790382244145, "grad_norm": 0.04258548840880394, "learning_rate": 0.01, "loss": 1.9871, "step": 33546 }, { "epoch": 3.4472872996300863, "grad_norm": 0.04736476391553879, "learning_rate": 0.01, "loss": 1.983, "step": 33549 }, { "epoch": 3.4475955610357585, "grad_norm": 0.04423545300960541, "learning_rate": 0.01, "loss": 2.001, "step": 33552 }, { "epoch": 3.4479038224414302, "grad_norm": 0.07585839927196503, "learning_rate": 0.01, "loss": 2.0094, "step": 33555 }, { "epoch": 3.4482120838471024, "grad_norm": 0.03559425473213196, "learning_rate": 0.01, "loss": 1.9835, "step": 33558 }, { "epoch": 3.448520345252774, "grad_norm": 0.058567702770233154, "learning_rate": 0.01, "loss": 1.9996, "step": 33561 }, { "epoch": 3.4488286066584464, "grad_norm": 0.05344400927424431, "learning_rate": 0.01, "loss": 2.0128, "step": 33564 }, { "epoch": 3.4491368680641186, "grad_norm": 0.0584418885409832, "learning_rate": 0.01, "loss": 2.0334, "step": 33567 }, { "epoch": 3.4494451294697903, "grad_norm": 0.14322839677333832, "learning_rate": 0.01, "loss": 2.0362, "step": 33570 }, { "epoch": 3.4497533908754625, "grad_norm": 0.039136361330747604, "learning_rate": 0.01, "loss": 1.9792, "step": 33573 }, { "epoch": 3.4500616522811343, "grad_norm": 0.0871317982673645, "learning_rate": 0.01, "loss": 2.03, "step": 33576 }, { "epoch": 3.4503699136868065, "grad_norm": 0.07295375317335129, "learning_rate": 0.01, "loss": 1.9906, "step": 33579 }, { "epoch": 3.450678175092478, "grad_norm": 0.04469291865825653, "learning_rate": 0.01, "loss": 2.0245, "step": 33582 }, { "epoch": 3.4509864364981504, "grad_norm": 0.06063535064458847, "learning_rate": 0.01, "loss": 2.0083, "step": 33585 }, { "epoch": 3.4512946979038226, "grad_norm": 0.07924182713031769, "learning_rate": 0.01, "loss": 2.0115, "step": 33588 }, { "epoch": 3.4516029593094943, "grad_norm": 0.089077427983284, "learning_rate": 0.01, "loss": 2.017, "step": 33591 }, { "epoch": 3.4519112207151665, "grad_norm": 0.08197617530822754, "learning_rate": 0.01, "loss": 1.9987, "step": 33594 }, { "epoch": 3.4522194821208383, "grad_norm": 0.039551399648189545, "learning_rate": 0.01, "loss": 2.0211, "step": 33597 }, { "epoch": 3.4525277435265105, "grad_norm": 0.08920145779848099, "learning_rate": 0.01, "loss": 1.9952, "step": 33600 }, { "epoch": 3.4528360049321827, "grad_norm": 0.04301462695002556, "learning_rate": 0.01, "loss": 1.9898, "step": 33603 }, { "epoch": 3.4531442663378544, "grad_norm": 0.05201060697436333, "learning_rate": 0.01, "loss": 2.0022, "step": 33606 }, { "epoch": 3.4534525277435266, "grad_norm": 0.05899956077337265, "learning_rate": 0.01, "loss": 2.0318, "step": 33609 }, { "epoch": 3.4537607891491984, "grad_norm": 0.09653299301862717, "learning_rate": 0.01, "loss": 1.9955, "step": 33612 }, { "epoch": 3.4540690505548706, "grad_norm": 0.06416913866996765, "learning_rate": 0.01, "loss": 1.9858, "step": 33615 }, { "epoch": 3.4543773119605428, "grad_norm": 0.07932529598474503, "learning_rate": 0.01, "loss": 1.9941, "step": 33618 }, { "epoch": 3.4546855733662145, "grad_norm": 0.06251846253871918, "learning_rate": 0.01, "loss": 1.984, "step": 33621 }, { "epoch": 3.4549938347718867, "grad_norm": 0.07022767513990402, "learning_rate": 0.01, "loss": 2.0032, "step": 33624 }, { "epoch": 3.4553020961775585, "grad_norm": 0.08116226643323898, "learning_rate": 0.01, "loss": 2.0229, "step": 33627 }, { "epoch": 3.4556103575832307, "grad_norm": 0.07052188366651535, "learning_rate": 0.01, "loss": 1.986, "step": 33630 }, { "epoch": 3.4559186189889024, "grad_norm": 0.055427566170692444, "learning_rate": 0.01, "loss": 1.9792, "step": 33633 }, { "epoch": 3.4562268803945746, "grad_norm": 0.049462925642728806, "learning_rate": 0.01, "loss": 1.9937, "step": 33636 }, { "epoch": 3.456535141800247, "grad_norm": 0.05117397755384445, "learning_rate": 0.01, "loss": 2.0058, "step": 33639 }, { "epoch": 3.4568434032059185, "grad_norm": 0.14244894683361053, "learning_rate": 0.01, "loss": 1.9981, "step": 33642 }, { "epoch": 3.4571516646115907, "grad_norm": 0.040531981736421585, "learning_rate": 0.01, "loss": 2.0291, "step": 33645 }, { "epoch": 3.4574599260172625, "grad_norm": 0.0898265540599823, "learning_rate": 0.01, "loss": 2.0103, "step": 33648 }, { "epoch": 3.4577681874229347, "grad_norm": 0.06414288282394409, "learning_rate": 0.01, "loss": 1.9944, "step": 33651 }, { "epoch": 3.4580764488286064, "grad_norm": 0.06362918019294739, "learning_rate": 0.01, "loss": 2.031, "step": 33654 }, { "epoch": 3.4583847102342786, "grad_norm": 0.0881992056965828, "learning_rate": 0.01, "loss": 2.0363, "step": 33657 }, { "epoch": 3.458692971639951, "grad_norm": 0.03909778594970703, "learning_rate": 0.01, "loss": 1.9831, "step": 33660 }, { "epoch": 3.4590012330456226, "grad_norm": 0.06597696989774704, "learning_rate": 0.01, "loss": 2.0153, "step": 33663 }, { "epoch": 3.4593094944512948, "grad_norm": 0.05599299073219299, "learning_rate": 0.01, "loss": 2.0203, "step": 33666 }, { "epoch": 3.4596177558569665, "grad_norm": 0.0734795406460762, "learning_rate": 0.01, "loss": 2.0126, "step": 33669 }, { "epoch": 3.4599260172626387, "grad_norm": 0.07773378491401672, "learning_rate": 0.01, "loss": 1.9858, "step": 33672 }, { "epoch": 3.460234278668311, "grad_norm": 0.04273884743452072, "learning_rate": 0.01, "loss": 2.0054, "step": 33675 }, { "epoch": 3.4605425400739827, "grad_norm": 0.08914119005203247, "learning_rate": 0.01, "loss": 2.0455, "step": 33678 }, { "epoch": 3.460850801479655, "grad_norm": 0.059121765196323395, "learning_rate": 0.01, "loss": 1.9822, "step": 33681 }, { "epoch": 3.4611590628853266, "grad_norm": 0.0828641727566719, "learning_rate": 0.01, "loss": 1.9946, "step": 33684 }, { "epoch": 3.461467324290999, "grad_norm": 0.07057880610227585, "learning_rate": 0.01, "loss": 1.9918, "step": 33687 }, { "epoch": 3.461775585696671, "grad_norm": 0.0789676234126091, "learning_rate": 0.01, "loss": 2.005, "step": 33690 }, { "epoch": 3.4620838471023427, "grad_norm": 0.06654086709022522, "learning_rate": 0.01, "loss": 1.9751, "step": 33693 }, { "epoch": 3.462392108508015, "grad_norm": 0.08804110437631607, "learning_rate": 0.01, "loss": 1.9984, "step": 33696 }, { "epoch": 3.4627003699136867, "grad_norm": 0.05654985085129738, "learning_rate": 0.01, "loss": 1.9957, "step": 33699 }, { "epoch": 3.463008631319359, "grad_norm": 0.03474681079387665, "learning_rate": 0.01, "loss": 1.9757, "step": 33702 }, { "epoch": 3.4633168927250306, "grad_norm": 0.03495550900697708, "learning_rate": 0.01, "loss": 1.985, "step": 33705 }, { "epoch": 3.463625154130703, "grad_norm": 0.07207003980875015, "learning_rate": 0.01, "loss": 2.0038, "step": 33708 }, { "epoch": 3.463933415536375, "grad_norm": 0.10733482986688614, "learning_rate": 0.01, "loss": 1.998, "step": 33711 }, { "epoch": 3.4642416769420468, "grad_norm": 0.17830905318260193, "learning_rate": 0.01, "loss": 2.0263, "step": 33714 }, { "epoch": 3.464549938347719, "grad_norm": 0.10432233661413193, "learning_rate": 0.01, "loss": 2.0258, "step": 33717 }, { "epoch": 3.4648581997533907, "grad_norm": 0.08804329484701157, "learning_rate": 0.01, "loss": 2.0208, "step": 33720 }, { "epoch": 3.465166461159063, "grad_norm": 0.09405447542667389, "learning_rate": 0.01, "loss": 2.0228, "step": 33723 }, { "epoch": 3.4654747225647347, "grad_norm": 0.08153831958770752, "learning_rate": 0.01, "loss": 2.0129, "step": 33726 }, { "epoch": 3.465782983970407, "grad_norm": 0.04865524545311928, "learning_rate": 0.01, "loss": 2.0067, "step": 33729 }, { "epoch": 3.466091245376079, "grad_norm": 0.0705978274345398, "learning_rate": 0.01, "loss": 2.005, "step": 33732 }, { "epoch": 3.466399506781751, "grad_norm": 0.08283443003892899, "learning_rate": 0.01, "loss": 1.9943, "step": 33735 }, { "epoch": 3.466707768187423, "grad_norm": 0.059983205050230026, "learning_rate": 0.01, "loss": 1.9991, "step": 33738 }, { "epoch": 3.467016029593095, "grad_norm": 0.045960474759340286, "learning_rate": 0.01, "loss": 1.9823, "step": 33741 }, { "epoch": 3.467324290998767, "grad_norm": 0.03882883861660957, "learning_rate": 0.01, "loss": 2.0191, "step": 33744 }, { "epoch": 3.467632552404439, "grad_norm": 0.07004483044147491, "learning_rate": 0.01, "loss": 1.9851, "step": 33747 }, { "epoch": 3.467940813810111, "grad_norm": 0.047444459050893784, "learning_rate": 0.01, "loss": 2.0257, "step": 33750 }, { "epoch": 3.468249075215783, "grad_norm": 0.04262397438287735, "learning_rate": 0.01, "loss": 1.9981, "step": 33753 }, { "epoch": 3.468557336621455, "grad_norm": 0.05599913001060486, "learning_rate": 0.01, "loss": 2.005, "step": 33756 }, { "epoch": 3.468865598027127, "grad_norm": 0.03917532414197922, "learning_rate": 0.01, "loss": 2.012, "step": 33759 }, { "epoch": 3.469173859432799, "grad_norm": 0.14925096929073334, "learning_rate": 0.01, "loss": 1.9998, "step": 33762 }, { "epoch": 3.469482120838471, "grad_norm": 0.09570999443531036, "learning_rate": 0.01, "loss": 1.987, "step": 33765 }, { "epoch": 3.469790382244143, "grad_norm": 0.07883327454328537, "learning_rate": 0.01, "loss": 2.0051, "step": 33768 }, { "epoch": 3.470098643649815, "grad_norm": 0.06480975449085236, "learning_rate": 0.01, "loss": 2.0074, "step": 33771 }, { "epoch": 3.470406905055487, "grad_norm": 0.054496169090270996, "learning_rate": 0.01, "loss": 2.0306, "step": 33774 }, { "epoch": 3.470715166461159, "grad_norm": 0.04248659685254097, "learning_rate": 0.01, "loss": 2.0213, "step": 33777 }, { "epoch": 3.471023427866831, "grad_norm": 0.04117753356695175, "learning_rate": 0.01, "loss": 1.9845, "step": 33780 }, { "epoch": 3.4713316892725032, "grad_norm": 0.06876087188720703, "learning_rate": 0.01, "loss": 2.0142, "step": 33783 }, { "epoch": 3.471639950678175, "grad_norm": 0.08680430799722672, "learning_rate": 0.01, "loss": 2.0103, "step": 33786 }, { "epoch": 3.471948212083847, "grad_norm": 0.07227423787117004, "learning_rate": 0.01, "loss": 2.0229, "step": 33789 }, { "epoch": 3.472256473489519, "grad_norm": 0.04946593940258026, "learning_rate": 0.01, "loss": 2.0042, "step": 33792 }, { "epoch": 3.472564734895191, "grad_norm": 0.05215618014335632, "learning_rate": 0.01, "loss": 2.0202, "step": 33795 }, { "epoch": 3.4728729963008633, "grad_norm": 0.044869258999824524, "learning_rate": 0.01, "loss": 2.014, "step": 33798 }, { "epoch": 3.473181257706535, "grad_norm": 0.048952534794807434, "learning_rate": 0.01, "loss": 2.0097, "step": 33801 }, { "epoch": 3.4734895191122073, "grad_norm": 0.031407974660396576, "learning_rate": 0.01, "loss": 1.9993, "step": 33804 }, { "epoch": 3.473797780517879, "grad_norm": 0.033864185214042664, "learning_rate": 0.01, "loss": 2.0037, "step": 33807 }, { "epoch": 3.474106041923551, "grad_norm": 0.08060206472873688, "learning_rate": 0.01, "loss": 2.0053, "step": 33810 }, { "epoch": 3.4744143033292234, "grad_norm": 0.059409331530332565, "learning_rate": 0.01, "loss": 1.9876, "step": 33813 }, { "epoch": 3.474722564734895, "grad_norm": 0.04131161794066429, "learning_rate": 0.01, "loss": 1.9964, "step": 33816 }, { "epoch": 3.4750308261405674, "grad_norm": 0.04256697744131088, "learning_rate": 0.01, "loss": 2.0479, "step": 33819 }, { "epoch": 3.475339087546239, "grad_norm": 0.08916765451431274, "learning_rate": 0.01, "loss": 1.9885, "step": 33822 }, { "epoch": 3.4756473489519113, "grad_norm": 0.08474129438400269, "learning_rate": 0.01, "loss": 2.0012, "step": 33825 }, { "epoch": 3.475955610357583, "grad_norm": 0.05649823695421219, "learning_rate": 0.01, "loss": 1.9993, "step": 33828 }, { "epoch": 3.4762638717632552, "grad_norm": 0.05935325473546982, "learning_rate": 0.01, "loss": 2.0004, "step": 33831 }, { "epoch": 3.4765721331689274, "grad_norm": 0.04731081798672676, "learning_rate": 0.01, "loss": 1.9693, "step": 33834 }, { "epoch": 3.476880394574599, "grad_norm": 0.03790346160531044, "learning_rate": 0.01, "loss": 2.0295, "step": 33837 }, { "epoch": 3.4771886559802714, "grad_norm": 0.03726886212825775, "learning_rate": 0.01, "loss": 1.9737, "step": 33840 }, { "epoch": 3.477496917385943, "grad_norm": 0.07210247963666916, "learning_rate": 0.01, "loss": 1.9936, "step": 33843 }, { "epoch": 3.4778051787916153, "grad_norm": 0.07953071594238281, "learning_rate": 0.01, "loss": 2.0269, "step": 33846 }, { "epoch": 3.478113440197287, "grad_norm": 0.08004479855298996, "learning_rate": 0.01, "loss": 1.9947, "step": 33849 }, { "epoch": 3.4784217016029593, "grad_norm": 0.12617255747318268, "learning_rate": 0.01, "loss": 2.041, "step": 33852 }, { "epoch": 3.4787299630086315, "grad_norm": 0.08742248266935349, "learning_rate": 0.01, "loss": 1.9946, "step": 33855 }, { "epoch": 3.479038224414303, "grad_norm": 0.07119353115558624, "learning_rate": 0.01, "loss": 2.0041, "step": 33858 }, { "epoch": 3.4793464858199754, "grad_norm": 0.06368902325630188, "learning_rate": 0.01, "loss": 2.011, "step": 33861 }, { "epoch": 3.479654747225647, "grad_norm": 0.04121660441160202, "learning_rate": 0.01, "loss": 1.9902, "step": 33864 }, { "epoch": 3.4799630086313194, "grad_norm": 0.1237914115190506, "learning_rate": 0.01, "loss": 2.012, "step": 33867 }, { "epoch": 3.4802712700369915, "grad_norm": 0.02918284945189953, "learning_rate": 0.01, "loss": 2.0104, "step": 33870 }, { "epoch": 3.4805795314426633, "grad_norm": 0.0423772819340229, "learning_rate": 0.01, "loss": 2.0024, "step": 33873 }, { "epoch": 3.4808877928483355, "grad_norm": 0.05689895898103714, "learning_rate": 0.01, "loss": 2.0074, "step": 33876 }, { "epoch": 3.4811960542540072, "grad_norm": 0.057178955525159836, "learning_rate": 0.01, "loss": 1.9796, "step": 33879 }, { "epoch": 3.4815043156596794, "grad_norm": 0.04840772971510887, "learning_rate": 0.01, "loss": 1.9846, "step": 33882 }, { "epoch": 3.4818125770653516, "grad_norm": 0.0467451848089695, "learning_rate": 0.01, "loss": 2.0124, "step": 33885 }, { "epoch": 3.4821208384710234, "grad_norm": 0.08849336951971054, "learning_rate": 0.01, "loss": 2.0056, "step": 33888 }, { "epoch": 3.4824290998766956, "grad_norm": 0.040159258991479874, "learning_rate": 0.01, "loss": 2.0074, "step": 33891 }, { "epoch": 3.4827373612823673, "grad_norm": 0.03237557038664818, "learning_rate": 0.01, "loss": 1.9885, "step": 33894 }, { "epoch": 3.4830456226880395, "grad_norm": 0.11882402747869492, "learning_rate": 0.01, "loss": 1.988, "step": 33897 }, { "epoch": 3.4833538840937113, "grad_norm": 0.04206301271915436, "learning_rate": 0.01, "loss": 2.0138, "step": 33900 }, { "epoch": 3.4836621454993835, "grad_norm": 0.08875782787799835, "learning_rate": 0.01, "loss": 2.0009, "step": 33903 }, { "epoch": 3.4839704069050557, "grad_norm": 0.07771812379360199, "learning_rate": 0.01, "loss": 1.9787, "step": 33906 }, { "epoch": 3.4842786683107274, "grad_norm": 0.0839293822646141, "learning_rate": 0.01, "loss": 2.0191, "step": 33909 }, { "epoch": 3.4845869297163996, "grad_norm": 0.051187288016080856, "learning_rate": 0.01, "loss": 2.0112, "step": 33912 }, { "epoch": 3.4848951911220714, "grad_norm": 0.07497724145650864, "learning_rate": 0.01, "loss": 2.0153, "step": 33915 }, { "epoch": 3.4852034525277436, "grad_norm": 0.09341751039028168, "learning_rate": 0.01, "loss": 1.9815, "step": 33918 }, { "epoch": 3.4855117139334153, "grad_norm": 0.17380410432815552, "learning_rate": 0.01, "loss": 2.0015, "step": 33921 }, { "epoch": 3.4858199753390875, "grad_norm": 0.1407613456249237, "learning_rate": 0.01, "loss": 2.0066, "step": 33924 }, { "epoch": 3.4861282367447597, "grad_norm": 0.04020331799983978, "learning_rate": 0.01, "loss": 1.9915, "step": 33927 }, { "epoch": 3.4864364981504314, "grad_norm": 0.07008705288171768, "learning_rate": 0.01, "loss": 2.0045, "step": 33930 }, { "epoch": 3.4867447595561036, "grad_norm": 0.04310291260480881, "learning_rate": 0.01, "loss": 2.0103, "step": 33933 }, { "epoch": 3.487053020961776, "grad_norm": 0.03993843123316765, "learning_rate": 0.01, "loss": 2.0051, "step": 33936 }, { "epoch": 3.4873612823674476, "grad_norm": 0.04465002939105034, "learning_rate": 0.01, "loss": 2.0089, "step": 33939 }, { "epoch": 3.4876695437731198, "grad_norm": 0.03715788200497627, "learning_rate": 0.01, "loss": 1.9676, "step": 33942 }, { "epoch": 3.4879778051787915, "grad_norm": 0.06855741888284683, "learning_rate": 0.01, "loss": 2.0064, "step": 33945 }, { "epoch": 3.4882860665844637, "grad_norm": 0.05004393681883812, "learning_rate": 0.01, "loss": 2.0027, "step": 33948 }, { "epoch": 3.4885943279901355, "grad_norm": 0.11410415172576904, "learning_rate": 0.01, "loss": 1.9663, "step": 33951 }, { "epoch": 3.4889025893958077, "grad_norm": 0.09844734519720078, "learning_rate": 0.01, "loss": 1.9873, "step": 33954 }, { "epoch": 3.48921085080148, "grad_norm": 0.06708649545907974, "learning_rate": 0.01, "loss": 2.0026, "step": 33957 }, { "epoch": 3.4895191122071516, "grad_norm": 0.10117511451244354, "learning_rate": 0.01, "loss": 2.019, "step": 33960 }, { "epoch": 3.489827373612824, "grad_norm": 0.09758836030960083, "learning_rate": 0.01, "loss": 2.0272, "step": 33963 }, { "epoch": 3.4901356350184956, "grad_norm": 0.05189559981226921, "learning_rate": 0.01, "loss": 2.0107, "step": 33966 }, { "epoch": 3.4904438964241677, "grad_norm": 0.04805564507842064, "learning_rate": 0.01, "loss": 1.9984, "step": 33969 }, { "epoch": 3.4907521578298395, "grad_norm": 0.10716047137975693, "learning_rate": 0.01, "loss": 2.0004, "step": 33972 }, { "epoch": 3.4910604192355117, "grad_norm": 0.0761241689324379, "learning_rate": 0.01, "loss": 1.994, "step": 33975 }, { "epoch": 3.491368680641184, "grad_norm": 0.09611841291189194, "learning_rate": 0.01, "loss": 2.0152, "step": 33978 }, { "epoch": 3.4916769420468556, "grad_norm": 0.05685526877641678, "learning_rate": 0.01, "loss": 2.0061, "step": 33981 }, { "epoch": 3.491985203452528, "grad_norm": 0.044158194214105606, "learning_rate": 0.01, "loss": 1.9759, "step": 33984 }, { "epoch": 3.4922934648581996, "grad_norm": 0.0626639872789383, "learning_rate": 0.01, "loss": 2.0238, "step": 33987 }, { "epoch": 3.4926017262638718, "grad_norm": 0.0559966154396534, "learning_rate": 0.01, "loss": 2.0002, "step": 33990 }, { "epoch": 3.4929099876695435, "grad_norm": 0.05526448041200638, "learning_rate": 0.01, "loss": 2.0161, "step": 33993 }, { "epoch": 3.4932182490752157, "grad_norm": 0.07910171896219254, "learning_rate": 0.01, "loss": 2.0098, "step": 33996 }, { "epoch": 3.493526510480888, "grad_norm": 0.09646596759557724, "learning_rate": 0.01, "loss": 2.0161, "step": 33999 }, { "epoch": 3.4938347718865597, "grad_norm": 0.06635434925556183, "learning_rate": 0.01, "loss": 2.0271, "step": 34002 }, { "epoch": 3.494143033292232, "grad_norm": 0.04328935965895653, "learning_rate": 0.01, "loss": 2.0079, "step": 34005 }, { "epoch": 3.494451294697904, "grad_norm": 0.040060825645923615, "learning_rate": 0.01, "loss": 2.0076, "step": 34008 }, { "epoch": 3.494759556103576, "grad_norm": 0.0396769754588604, "learning_rate": 0.01, "loss": 2.0319, "step": 34011 }, { "epoch": 3.495067817509248, "grad_norm": 0.035870879888534546, "learning_rate": 0.01, "loss": 2.0013, "step": 34014 }, { "epoch": 3.4953760789149197, "grad_norm": 0.07099170982837677, "learning_rate": 0.01, "loss": 2.0024, "step": 34017 }, { "epoch": 3.495684340320592, "grad_norm": 0.06994140148162842, "learning_rate": 0.01, "loss": 2.009, "step": 34020 }, { "epoch": 3.4959926017262637, "grad_norm": 0.03673701733350754, "learning_rate": 0.01, "loss": 2.0091, "step": 34023 }, { "epoch": 3.496300863131936, "grad_norm": 0.036696165800094604, "learning_rate": 0.01, "loss": 2.0196, "step": 34026 }, { "epoch": 3.496609124537608, "grad_norm": 0.1089354008436203, "learning_rate": 0.01, "loss": 2.0083, "step": 34029 }, { "epoch": 3.49691738594328, "grad_norm": 0.04758044332265854, "learning_rate": 0.01, "loss": 1.9975, "step": 34032 }, { "epoch": 3.497225647348952, "grad_norm": 0.05122329294681549, "learning_rate": 0.01, "loss": 1.9944, "step": 34035 }, { "epoch": 3.4975339087546238, "grad_norm": 0.04168769717216492, "learning_rate": 0.01, "loss": 2.013, "step": 34038 }, { "epoch": 3.497842170160296, "grad_norm": 0.03426910564303398, "learning_rate": 0.01, "loss": 2.0217, "step": 34041 }, { "epoch": 3.4981504315659677, "grad_norm": 0.036272600293159485, "learning_rate": 0.01, "loss": 2.0134, "step": 34044 }, { "epoch": 3.49845869297164, "grad_norm": 0.11142997443675995, "learning_rate": 0.01, "loss": 2.0006, "step": 34047 }, { "epoch": 3.498766954377312, "grad_norm": 0.08410037308931351, "learning_rate": 0.01, "loss": 2.0274, "step": 34050 }, { "epoch": 3.499075215782984, "grad_norm": 0.10037896782159805, "learning_rate": 0.01, "loss": 2.0187, "step": 34053 }, { "epoch": 3.499383477188656, "grad_norm": 0.05194736644625664, "learning_rate": 0.01, "loss": 1.9862, "step": 34056 }, { "epoch": 3.499691738594328, "grad_norm": 0.037128567695617676, "learning_rate": 0.01, "loss": 1.9806, "step": 34059 }, { "epoch": 3.5, "grad_norm": 0.059042248874902725, "learning_rate": 0.01, "loss": 2.0134, "step": 34062 }, { "epoch": 3.5003082614056718, "grad_norm": 0.06282515078783035, "learning_rate": 0.01, "loss": 1.9964, "step": 34065 }, { "epoch": 3.500616522811344, "grad_norm": 0.04248201847076416, "learning_rate": 0.01, "loss": 2.0045, "step": 34068 }, { "epoch": 3.500924784217016, "grad_norm": 0.047459498047828674, "learning_rate": 0.01, "loss": 2.0171, "step": 34071 }, { "epoch": 3.501233045622688, "grad_norm": 0.04447497799992561, "learning_rate": 0.01, "loss": 2.0263, "step": 34074 }, { "epoch": 3.50154130702836, "grad_norm": 0.03569423034787178, "learning_rate": 0.01, "loss": 1.9867, "step": 34077 }, { "epoch": 3.5018495684340323, "grad_norm": 0.055864010006189346, "learning_rate": 0.01, "loss": 2.0025, "step": 34080 }, { "epoch": 3.502157829839704, "grad_norm": 0.055652111768722534, "learning_rate": 0.01, "loss": 1.9972, "step": 34083 }, { "epoch": 3.5024660912453762, "grad_norm": 0.1195710226893425, "learning_rate": 0.01, "loss": 1.9942, "step": 34086 }, { "epoch": 3.502774352651048, "grad_norm": 0.04436422884464264, "learning_rate": 0.01, "loss": 2.0008, "step": 34089 }, { "epoch": 3.50308261405672, "grad_norm": 0.047165922820568085, "learning_rate": 0.01, "loss": 2.0211, "step": 34092 }, { "epoch": 3.503390875462392, "grad_norm": 0.07143979519605637, "learning_rate": 0.01, "loss": 2.0108, "step": 34095 }, { "epoch": 3.503699136868064, "grad_norm": 0.04734091833233833, "learning_rate": 0.01, "loss": 2.0183, "step": 34098 }, { "epoch": 3.5040073982737363, "grad_norm": 0.05058762803673744, "learning_rate": 0.01, "loss": 1.9887, "step": 34101 }, { "epoch": 3.504315659679408, "grad_norm": 0.03917768597602844, "learning_rate": 0.01, "loss": 2.0052, "step": 34104 }, { "epoch": 3.5046239210850803, "grad_norm": 0.06384671479463577, "learning_rate": 0.01, "loss": 2.0188, "step": 34107 }, { "epoch": 3.504932182490752, "grad_norm": 0.046532079577445984, "learning_rate": 0.01, "loss": 1.9841, "step": 34110 }, { "epoch": 3.505240443896424, "grad_norm": 0.033960457891225815, "learning_rate": 0.01, "loss": 2.0045, "step": 34113 }, { "epoch": 3.505548705302096, "grad_norm": 0.061024975031614304, "learning_rate": 0.01, "loss": 2.0121, "step": 34116 }, { "epoch": 3.505856966707768, "grad_norm": 0.09465770423412323, "learning_rate": 0.01, "loss": 2.0043, "step": 34119 }, { "epoch": 3.5061652281134403, "grad_norm": 0.08510823547840118, "learning_rate": 0.01, "loss": 2.0116, "step": 34122 }, { "epoch": 3.506473489519112, "grad_norm": 0.12201209366321564, "learning_rate": 0.01, "loss": 2.0137, "step": 34125 }, { "epoch": 3.5067817509247843, "grad_norm": 0.08503463119268417, "learning_rate": 0.01, "loss": 2.0055, "step": 34128 }, { "epoch": 3.5070900123304565, "grad_norm": 0.07231762260198593, "learning_rate": 0.01, "loss": 1.9907, "step": 34131 }, { "epoch": 3.5073982737361282, "grad_norm": 0.042714521288871765, "learning_rate": 0.01, "loss": 2.0132, "step": 34134 }, { "epoch": 3.5077065351418, "grad_norm": 0.06119026616215706, "learning_rate": 0.01, "loss": 2.024, "step": 34137 }, { "epoch": 3.508014796547472, "grad_norm": 0.047117751091718674, "learning_rate": 0.01, "loss": 1.9938, "step": 34140 }, { "epoch": 3.5083230579531444, "grad_norm": 0.06318202614784241, "learning_rate": 0.01, "loss": 1.9826, "step": 34143 }, { "epoch": 3.508631319358816, "grad_norm": 0.05050423741340637, "learning_rate": 0.01, "loss": 1.9882, "step": 34146 }, { "epoch": 3.5089395807644883, "grad_norm": 0.06105552241206169, "learning_rate": 0.01, "loss": 1.9882, "step": 34149 }, { "epoch": 3.5092478421701605, "grad_norm": 0.05940508469939232, "learning_rate": 0.01, "loss": 2.0127, "step": 34152 }, { "epoch": 3.5095561035758323, "grad_norm": 0.05157890543341637, "learning_rate": 0.01, "loss": 1.9999, "step": 34155 }, { "epoch": 3.5098643649815044, "grad_norm": 0.06597436964511871, "learning_rate": 0.01, "loss": 2.0054, "step": 34158 }, { "epoch": 3.510172626387176, "grad_norm": 0.10424167662858963, "learning_rate": 0.01, "loss": 1.9899, "step": 34161 }, { "epoch": 3.5104808877928484, "grad_norm": 0.09572573006153107, "learning_rate": 0.01, "loss": 2.0132, "step": 34164 }, { "epoch": 3.51078914919852, "grad_norm": 0.07485774159431458, "learning_rate": 0.01, "loss": 2.0063, "step": 34167 }, { "epoch": 3.5110974106041923, "grad_norm": 0.12162437289953232, "learning_rate": 0.01, "loss": 2.0342, "step": 34170 }, { "epoch": 3.5114056720098645, "grad_norm": 0.09338829666376114, "learning_rate": 0.01, "loss": 2.0366, "step": 34173 }, { "epoch": 3.5117139334155363, "grad_norm": 0.06095868721604347, "learning_rate": 0.01, "loss": 2.0202, "step": 34176 }, { "epoch": 3.5120221948212085, "grad_norm": 0.057355985045433044, "learning_rate": 0.01, "loss": 2.0216, "step": 34179 }, { "epoch": 3.5123304562268807, "grad_norm": 0.10891444981098175, "learning_rate": 0.01, "loss": 2.0339, "step": 34182 }, { "epoch": 3.5126387176325524, "grad_norm": 0.10567829012870789, "learning_rate": 0.01, "loss": 2.0142, "step": 34185 }, { "epoch": 3.512946979038224, "grad_norm": 0.05244187265634537, "learning_rate": 0.01, "loss": 1.9864, "step": 34188 }, { "epoch": 3.5132552404438964, "grad_norm": 0.046076592057943344, "learning_rate": 0.01, "loss": 2.0013, "step": 34191 }, { "epoch": 3.5135635018495686, "grad_norm": 0.04405316337943077, "learning_rate": 0.01, "loss": 1.9941, "step": 34194 }, { "epoch": 3.5138717632552403, "grad_norm": 0.03997344523668289, "learning_rate": 0.01, "loss": 1.9915, "step": 34197 }, { "epoch": 3.5141800246609125, "grad_norm": 0.03909468650817871, "learning_rate": 0.01, "loss": 1.9767, "step": 34200 }, { "epoch": 3.5144882860665847, "grad_norm": 0.11657961457967758, "learning_rate": 0.01, "loss": 1.9941, "step": 34203 }, { "epoch": 3.5147965474722564, "grad_norm": 0.11744063347578049, "learning_rate": 0.01, "loss": 2.0029, "step": 34206 }, { "epoch": 3.5151048088779286, "grad_norm": 0.039932142943143845, "learning_rate": 0.01, "loss": 2.0168, "step": 34209 }, { "epoch": 3.5154130702836004, "grad_norm": 0.03299909457564354, "learning_rate": 0.01, "loss": 2.022, "step": 34212 }, { "epoch": 3.5157213316892726, "grad_norm": 0.1055513322353363, "learning_rate": 0.01, "loss": 2.0165, "step": 34215 }, { "epoch": 3.5160295930949443, "grad_norm": 0.05073374882340431, "learning_rate": 0.01, "loss": 1.9951, "step": 34218 }, { "epoch": 3.5163378545006165, "grad_norm": 0.05423841252923012, "learning_rate": 0.01, "loss": 2.0115, "step": 34221 }, { "epoch": 3.5166461159062887, "grad_norm": 0.05695211887359619, "learning_rate": 0.01, "loss": 2.011, "step": 34224 }, { "epoch": 3.5169543773119605, "grad_norm": 0.04092913866043091, "learning_rate": 0.01, "loss": 2.0303, "step": 34227 }, { "epoch": 3.5172626387176327, "grad_norm": 0.05083661898970604, "learning_rate": 0.01, "loss": 2.0081, "step": 34230 }, { "epoch": 3.5175709001233044, "grad_norm": 0.04499472677707672, "learning_rate": 0.01, "loss": 2.0036, "step": 34233 }, { "epoch": 3.5178791615289766, "grad_norm": 0.09770061075687408, "learning_rate": 0.01, "loss": 2.0209, "step": 34236 }, { "epoch": 3.5181874229346484, "grad_norm": 0.09951679408550262, "learning_rate": 0.01, "loss": 1.9993, "step": 34239 }, { "epoch": 3.5184956843403206, "grad_norm": 0.11089123040437698, "learning_rate": 0.01, "loss": 2.0088, "step": 34242 }, { "epoch": 3.5188039457459928, "grad_norm": 0.05511726066470146, "learning_rate": 0.01, "loss": 2.0129, "step": 34245 }, { "epoch": 3.5191122071516645, "grad_norm": 0.035139378160238266, "learning_rate": 0.01, "loss": 2.016, "step": 34248 }, { "epoch": 3.5194204685573367, "grad_norm": 0.05882834270596504, "learning_rate": 0.01, "loss": 2.0061, "step": 34251 }, { "epoch": 3.519728729963009, "grad_norm": 0.06780868023633957, "learning_rate": 0.01, "loss": 1.977, "step": 34254 }, { "epoch": 3.5200369913686806, "grad_norm": 0.053879499435424805, "learning_rate": 0.01, "loss": 2.0055, "step": 34257 }, { "epoch": 3.5203452527743524, "grad_norm": 0.045473113656044006, "learning_rate": 0.01, "loss": 2.0108, "step": 34260 }, { "epoch": 3.5206535141800246, "grad_norm": 0.050460174679756165, "learning_rate": 0.01, "loss": 2.0327, "step": 34263 }, { "epoch": 3.520961775585697, "grad_norm": 0.040959011763334274, "learning_rate": 0.01, "loss": 1.9772, "step": 34266 }, { "epoch": 3.5212700369913685, "grad_norm": 0.048418451100587845, "learning_rate": 0.01, "loss": 2.0182, "step": 34269 }, { "epoch": 3.5215782983970407, "grad_norm": 0.06287720799446106, "learning_rate": 0.01, "loss": 2.0014, "step": 34272 }, { "epoch": 3.521886559802713, "grad_norm": 0.04529783874750137, "learning_rate": 0.01, "loss": 2.0011, "step": 34275 }, { "epoch": 3.5221948212083847, "grad_norm": 0.03729906305670738, "learning_rate": 0.01, "loss": 2.0183, "step": 34278 }, { "epoch": 3.522503082614057, "grad_norm": 0.1012713834643364, "learning_rate": 0.01, "loss": 1.9841, "step": 34281 }, { "epoch": 3.5228113440197286, "grad_norm": 0.10641775280237198, "learning_rate": 0.01, "loss": 2.0122, "step": 34284 }, { "epoch": 3.523119605425401, "grad_norm": 0.09289912134408951, "learning_rate": 0.01, "loss": 2.016, "step": 34287 }, { "epoch": 3.5234278668310726, "grad_norm": 0.13354292511940002, "learning_rate": 0.01, "loss": 2.0119, "step": 34290 }, { "epoch": 3.5237361282367448, "grad_norm": 0.08027501404285431, "learning_rate": 0.01, "loss": 2.0046, "step": 34293 }, { "epoch": 3.524044389642417, "grad_norm": 0.047286976128816605, "learning_rate": 0.01, "loss": 1.9796, "step": 34296 }, { "epoch": 3.5243526510480887, "grad_norm": 0.042211420834064484, "learning_rate": 0.01, "loss": 2.0426, "step": 34299 }, { "epoch": 3.524660912453761, "grad_norm": 0.11374162137508392, "learning_rate": 0.01, "loss": 1.9954, "step": 34302 }, { "epoch": 3.5249691738594326, "grad_norm": 0.05604710429906845, "learning_rate": 0.01, "loss": 2.0039, "step": 34305 }, { "epoch": 3.525277435265105, "grad_norm": 0.09744080156087875, "learning_rate": 0.01, "loss": 1.9944, "step": 34308 }, { "epoch": 3.5255856966707766, "grad_norm": 0.08903683722019196, "learning_rate": 0.01, "loss": 1.9931, "step": 34311 }, { "epoch": 3.525893958076449, "grad_norm": 0.048648543655872345, "learning_rate": 0.01, "loss": 2.0028, "step": 34314 }, { "epoch": 3.526202219482121, "grad_norm": 0.0710827112197876, "learning_rate": 0.01, "loss": 2.0168, "step": 34317 }, { "epoch": 3.5265104808877927, "grad_norm": 0.04736728593707085, "learning_rate": 0.01, "loss": 1.9812, "step": 34320 }, { "epoch": 3.526818742293465, "grad_norm": 0.05635381117463112, "learning_rate": 0.01, "loss": 2.0202, "step": 34323 }, { "epoch": 3.527127003699137, "grad_norm": 0.06686391681432724, "learning_rate": 0.01, "loss": 2.0196, "step": 34326 }, { "epoch": 3.527435265104809, "grad_norm": 0.03842944651842117, "learning_rate": 0.01, "loss": 1.9974, "step": 34329 }, { "epoch": 3.5277435265104806, "grad_norm": 0.042534928768873215, "learning_rate": 0.01, "loss": 2.0155, "step": 34332 }, { "epoch": 3.528051787916153, "grad_norm": 0.06037287786602974, "learning_rate": 0.01, "loss": 2.0192, "step": 34335 }, { "epoch": 3.528360049321825, "grad_norm": 0.06533370167016983, "learning_rate": 0.01, "loss": 1.9873, "step": 34338 }, { "epoch": 3.5286683107274968, "grad_norm": 0.0877017006278038, "learning_rate": 0.01, "loss": 2.0081, "step": 34341 }, { "epoch": 3.528976572133169, "grad_norm": 0.09864972531795502, "learning_rate": 0.01, "loss": 1.9866, "step": 34344 }, { "epoch": 3.529284833538841, "grad_norm": 0.049320898950099945, "learning_rate": 0.01, "loss": 1.9994, "step": 34347 }, { "epoch": 3.529593094944513, "grad_norm": 0.12996013462543488, "learning_rate": 0.01, "loss": 2.0136, "step": 34350 }, { "epoch": 3.529901356350185, "grad_norm": 0.06548713147640228, "learning_rate": 0.01, "loss": 2.0221, "step": 34353 }, { "epoch": 3.530209617755857, "grad_norm": 0.045482341200113297, "learning_rate": 0.01, "loss": 2.0062, "step": 34356 }, { "epoch": 3.530517879161529, "grad_norm": 0.04433637857437134, "learning_rate": 0.01, "loss": 2.0062, "step": 34359 }, { "epoch": 3.530826140567201, "grad_norm": 0.04450944438576698, "learning_rate": 0.01, "loss": 1.983, "step": 34362 }, { "epoch": 3.531134401972873, "grad_norm": 0.07188856601715088, "learning_rate": 0.01, "loss": 2.0426, "step": 34365 }, { "epoch": 3.531442663378545, "grad_norm": 0.049853190779685974, "learning_rate": 0.01, "loss": 1.9959, "step": 34368 }, { "epoch": 3.531750924784217, "grad_norm": 0.08662360161542892, "learning_rate": 0.01, "loss": 1.9886, "step": 34371 }, { "epoch": 3.532059186189889, "grad_norm": 0.07737040519714355, "learning_rate": 0.01, "loss": 2.0064, "step": 34374 }, { "epoch": 3.532367447595561, "grad_norm": 0.0868213102221489, "learning_rate": 0.01, "loss": 1.9836, "step": 34377 }, { "epoch": 3.532675709001233, "grad_norm": 0.06288056075572968, "learning_rate": 0.01, "loss": 2.0142, "step": 34380 }, { "epoch": 3.532983970406905, "grad_norm": 0.05537475645542145, "learning_rate": 0.01, "loss": 1.9937, "step": 34383 }, { "epoch": 3.533292231812577, "grad_norm": 0.13551141321659088, "learning_rate": 0.01, "loss": 2.0162, "step": 34386 }, { "epoch": 3.533600493218249, "grad_norm": 0.040236156433820724, "learning_rate": 0.01, "loss": 2.0006, "step": 34389 }, { "epoch": 3.533908754623921, "grad_norm": 0.06904727220535278, "learning_rate": 0.01, "loss": 1.9882, "step": 34392 }, { "epoch": 3.534217016029593, "grad_norm": 0.06675262004137039, "learning_rate": 0.01, "loss": 2.0018, "step": 34395 }, { "epoch": 3.5345252774352653, "grad_norm": 0.05011274665594101, "learning_rate": 0.01, "loss": 2.0267, "step": 34398 }, { "epoch": 3.534833538840937, "grad_norm": 0.04494976997375488, "learning_rate": 0.01, "loss": 2.0243, "step": 34401 }, { "epoch": 3.5351418002466093, "grad_norm": 0.04114719480276108, "learning_rate": 0.01, "loss": 2.0161, "step": 34404 }, { "epoch": 3.535450061652281, "grad_norm": 0.03544189780950546, "learning_rate": 0.01, "loss": 1.9987, "step": 34407 }, { "epoch": 3.5357583230579532, "grad_norm": 0.10223556309938431, "learning_rate": 0.01, "loss": 2.0237, "step": 34410 }, { "epoch": 3.536066584463625, "grad_norm": 0.03573578596115112, "learning_rate": 0.01, "loss": 2.0017, "step": 34413 }, { "epoch": 3.536374845869297, "grad_norm": 0.08199316263198853, "learning_rate": 0.01, "loss": 2.007, "step": 34416 }, { "epoch": 3.5366831072749694, "grad_norm": 0.04587673768401146, "learning_rate": 0.01, "loss": 2.01, "step": 34419 }, { "epoch": 3.536991368680641, "grad_norm": 0.061022888869047165, "learning_rate": 0.01, "loss": 1.9894, "step": 34422 }, { "epoch": 3.5372996300863133, "grad_norm": 0.09616536647081375, "learning_rate": 0.01, "loss": 2.0081, "step": 34425 }, { "epoch": 3.537607891491985, "grad_norm": 0.10521430522203445, "learning_rate": 0.01, "loss": 2.0187, "step": 34428 }, { "epoch": 3.5379161528976573, "grad_norm": 0.0551154688000679, "learning_rate": 0.01, "loss": 2.0034, "step": 34431 }, { "epoch": 3.538224414303329, "grad_norm": 0.0405128113925457, "learning_rate": 0.01, "loss": 2.0152, "step": 34434 }, { "epoch": 3.538532675709001, "grad_norm": 0.035197604447603226, "learning_rate": 0.01, "loss": 2.0158, "step": 34437 }, { "epoch": 3.5388409371146734, "grad_norm": 0.026394899934530258, "learning_rate": 0.01, "loss": 1.9898, "step": 34440 }, { "epoch": 3.539149198520345, "grad_norm": 0.09831299632787704, "learning_rate": 0.01, "loss": 2.0205, "step": 34443 }, { "epoch": 3.5394574599260173, "grad_norm": 0.08986491709947586, "learning_rate": 0.01, "loss": 2.023, "step": 34446 }, { "epoch": 3.5397657213316895, "grad_norm": 0.07974385470151901, "learning_rate": 0.01, "loss": 1.9975, "step": 34449 }, { "epoch": 3.5400739827373613, "grad_norm": 0.11314170062541962, "learning_rate": 0.01, "loss": 2.0045, "step": 34452 }, { "epoch": 3.540382244143033, "grad_norm": 0.05535215139389038, "learning_rate": 0.01, "loss": 2.0142, "step": 34455 }, { "epoch": 3.5406905055487052, "grad_norm": 0.0592242069542408, "learning_rate": 0.01, "loss": 2.0243, "step": 34458 }, { "epoch": 3.5409987669543774, "grad_norm": 0.12061761319637299, "learning_rate": 0.01, "loss": 2.0125, "step": 34461 }, { "epoch": 3.541307028360049, "grad_norm": 0.11443237960338593, "learning_rate": 0.01, "loss": 2.004, "step": 34464 }, { "epoch": 3.5416152897657214, "grad_norm": 0.050069622695446014, "learning_rate": 0.01, "loss": 2.013, "step": 34467 }, { "epoch": 3.5419235511713936, "grad_norm": 0.0419803261756897, "learning_rate": 0.01, "loss": 2.0141, "step": 34470 }, { "epoch": 3.5422318125770653, "grad_norm": 0.03493333235383034, "learning_rate": 0.01, "loss": 2.0106, "step": 34473 }, { "epoch": 3.5425400739827375, "grad_norm": 0.036889005452394485, "learning_rate": 0.01, "loss": 2.0085, "step": 34476 }, { "epoch": 3.5428483353884093, "grad_norm": 0.05217687413096428, "learning_rate": 0.01, "loss": 1.9987, "step": 34479 }, { "epoch": 3.5431565967940815, "grad_norm": 0.050754569470882416, "learning_rate": 0.01, "loss": 2.0333, "step": 34482 }, { "epoch": 3.543464858199753, "grad_norm": 0.04706338793039322, "learning_rate": 0.01, "loss": 1.9973, "step": 34485 }, { "epoch": 3.5437731196054254, "grad_norm": 0.07039839029312134, "learning_rate": 0.01, "loss": 2.0104, "step": 34488 }, { "epoch": 3.5440813810110976, "grad_norm": 0.07069671154022217, "learning_rate": 0.01, "loss": 1.978, "step": 34491 }, { "epoch": 3.5443896424167693, "grad_norm": 0.09058693796396255, "learning_rate": 0.01, "loss": 2.0212, "step": 34494 }, { "epoch": 3.5446979038224415, "grad_norm": 0.037666015326976776, "learning_rate": 0.01, "loss": 2.0312, "step": 34497 }, { "epoch": 3.5450061652281133, "grad_norm": 0.04604499414563179, "learning_rate": 0.01, "loss": 2.041, "step": 34500 }, { "epoch": 3.5453144266337855, "grad_norm": 0.03853873908519745, "learning_rate": 0.01, "loss": 2.0138, "step": 34503 }, { "epoch": 3.5456226880394572, "grad_norm": 0.07123208791017532, "learning_rate": 0.01, "loss": 2.0107, "step": 34506 }, { "epoch": 3.5459309494451294, "grad_norm": 0.0936029776930809, "learning_rate": 0.01, "loss": 2.0001, "step": 34509 }, { "epoch": 3.5462392108508016, "grad_norm": 0.07412207871675491, "learning_rate": 0.01, "loss": 2.0034, "step": 34512 }, { "epoch": 3.5465474722564734, "grad_norm": 0.07681329548358917, "learning_rate": 0.01, "loss": 1.9964, "step": 34515 }, { "epoch": 3.5468557336621456, "grad_norm": 0.10403033345937729, "learning_rate": 0.01, "loss": 2.0077, "step": 34518 }, { "epoch": 3.5471639950678178, "grad_norm": 0.14240513741970062, "learning_rate": 0.01, "loss": 1.9695, "step": 34521 }, { "epoch": 3.5474722564734895, "grad_norm": 0.14203394949436188, "learning_rate": 0.01, "loss": 2.0187, "step": 34524 }, { "epoch": 3.5477805178791613, "grad_norm": 0.09036475419998169, "learning_rate": 0.01, "loss": 2.0054, "step": 34527 }, { "epoch": 3.5480887792848335, "grad_norm": 0.037448760122060776, "learning_rate": 0.01, "loss": 1.9978, "step": 34530 }, { "epoch": 3.5483970406905057, "grad_norm": 0.05539664253592491, "learning_rate": 0.01, "loss": 2.0015, "step": 34533 }, { "epoch": 3.5487053020961774, "grad_norm": 0.05857717618346214, "learning_rate": 0.01, "loss": 1.9974, "step": 34536 }, { "epoch": 3.5490135635018496, "grad_norm": 0.06406868249177933, "learning_rate": 0.01, "loss": 2.009, "step": 34539 }, { "epoch": 3.549321824907522, "grad_norm": 0.04234686121344566, "learning_rate": 0.01, "loss": 2.0273, "step": 34542 }, { "epoch": 3.5496300863131935, "grad_norm": 0.038916390389204025, "learning_rate": 0.01, "loss": 2.0059, "step": 34545 }, { "epoch": 3.5499383477188657, "grad_norm": 0.039077602326869965, "learning_rate": 0.01, "loss": 2.0196, "step": 34548 }, { "epoch": 3.5502466091245375, "grad_norm": 0.03960174322128296, "learning_rate": 0.01, "loss": 2.0167, "step": 34551 }, { "epoch": 3.5505548705302097, "grad_norm": 0.12132997810840607, "learning_rate": 0.01, "loss": 1.9902, "step": 34554 }, { "epoch": 3.5508631319358814, "grad_norm": 0.09045036137104034, "learning_rate": 0.01, "loss": 2.0178, "step": 34557 }, { "epoch": 3.5511713933415536, "grad_norm": 0.07062508165836334, "learning_rate": 0.01, "loss": 1.9798, "step": 34560 }, { "epoch": 3.551479654747226, "grad_norm": 0.08182030916213989, "learning_rate": 0.01, "loss": 1.9715, "step": 34563 }, { "epoch": 3.5517879161528976, "grad_norm": 0.05917488783597946, "learning_rate": 0.01, "loss": 2.0149, "step": 34566 }, { "epoch": 3.5520961775585698, "grad_norm": 0.08519969880580902, "learning_rate": 0.01, "loss": 2.009, "step": 34569 }, { "epoch": 3.5524044389642415, "grad_norm": 0.04442654922604561, "learning_rate": 0.01, "loss": 2.0218, "step": 34572 }, { "epoch": 3.5527127003699137, "grad_norm": 0.08265768736600876, "learning_rate": 0.01, "loss": 2.0006, "step": 34575 }, { "epoch": 3.5530209617755855, "grad_norm": 0.0891944020986557, "learning_rate": 0.01, "loss": 1.991, "step": 34578 }, { "epoch": 3.5533292231812577, "grad_norm": 0.0688168928027153, "learning_rate": 0.01, "loss": 1.995, "step": 34581 }, { "epoch": 3.55363748458693, "grad_norm": 0.0936504453420639, "learning_rate": 0.01, "loss": 1.9835, "step": 34584 }, { "epoch": 3.5539457459926016, "grad_norm": 0.10172853618860245, "learning_rate": 0.01, "loss": 2.0016, "step": 34587 }, { "epoch": 3.554254007398274, "grad_norm": 0.04822350665926933, "learning_rate": 0.01, "loss": 1.9796, "step": 34590 }, { "epoch": 3.554562268803946, "grad_norm": 0.05222393944859505, "learning_rate": 0.01, "loss": 2.0036, "step": 34593 }, { "epoch": 3.5548705302096177, "grad_norm": 0.03592358157038689, "learning_rate": 0.01, "loss": 1.9911, "step": 34596 }, { "epoch": 3.5551787916152895, "grad_norm": 0.03903461620211601, "learning_rate": 0.01, "loss": 2.0062, "step": 34599 }, { "epoch": 3.5554870530209617, "grad_norm": 0.0467611663043499, "learning_rate": 0.01, "loss": 1.995, "step": 34602 }, { "epoch": 3.555795314426634, "grad_norm": 0.12140758335590363, "learning_rate": 0.01, "loss": 2.0168, "step": 34605 }, { "epoch": 3.5561035758323056, "grad_norm": 0.04155382886528969, "learning_rate": 0.01, "loss": 2.0048, "step": 34608 }, { "epoch": 3.556411837237978, "grad_norm": 0.039924267679452896, "learning_rate": 0.01, "loss": 1.9813, "step": 34611 }, { "epoch": 3.55672009864365, "grad_norm": 0.10513463616371155, "learning_rate": 0.01, "loss": 2.0098, "step": 34614 }, { "epoch": 3.5570283600493218, "grad_norm": 0.08956603705883026, "learning_rate": 0.01, "loss": 1.9951, "step": 34617 }, { "epoch": 3.557336621454994, "grad_norm": 0.045444682240486145, "learning_rate": 0.01, "loss": 2.0041, "step": 34620 }, { "epoch": 3.5576448828606657, "grad_norm": 0.05367853119969368, "learning_rate": 0.01, "loss": 2.0154, "step": 34623 }, { "epoch": 3.557953144266338, "grad_norm": 0.0465155765414238, "learning_rate": 0.01, "loss": 2.0094, "step": 34626 }, { "epoch": 3.5582614056720097, "grad_norm": 0.06788338720798492, "learning_rate": 0.01, "loss": 2.0526, "step": 34629 }, { "epoch": 3.558569667077682, "grad_norm": 0.04508093744516373, "learning_rate": 0.01, "loss": 2.0205, "step": 34632 }, { "epoch": 3.558877928483354, "grad_norm": 0.05827740207314491, "learning_rate": 0.01, "loss": 2.0154, "step": 34635 }, { "epoch": 3.559186189889026, "grad_norm": 0.09915097057819366, "learning_rate": 0.01, "loss": 2.0322, "step": 34638 }, { "epoch": 3.559494451294698, "grad_norm": 0.08571092784404755, "learning_rate": 0.01, "loss": 2.0157, "step": 34641 }, { "epoch": 3.55980271270037, "grad_norm": 0.06480662524700165, "learning_rate": 0.01, "loss": 1.9991, "step": 34644 }, { "epoch": 3.560110974106042, "grad_norm": 0.055505797266960144, "learning_rate": 0.01, "loss": 1.9894, "step": 34647 }, { "epoch": 3.5604192355117137, "grad_norm": 0.05562606081366539, "learning_rate": 0.01, "loss": 2.0173, "step": 34650 }, { "epoch": 3.560727496917386, "grad_norm": 0.05720195546746254, "learning_rate": 0.01, "loss": 1.999, "step": 34653 }, { "epoch": 3.561035758323058, "grad_norm": 0.14654351770877838, "learning_rate": 0.01, "loss": 2.0, "step": 34656 }, { "epoch": 3.56134401972873, "grad_norm": 0.04594961181282997, "learning_rate": 0.01, "loss": 1.9979, "step": 34659 }, { "epoch": 3.561652281134402, "grad_norm": 0.052737195044755936, "learning_rate": 0.01, "loss": 2.0013, "step": 34662 }, { "epoch": 3.561960542540074, "grad_norm": 0.0722641721367836, "learning_rate": 0.01, "loss": 2.0165, "step": 34665 }, { "epoch": 3.562268803945746, "grad_norm": 0.03973691165447235, "learning_rate": 0.01, "loss": 2.0317, "step": 34668 }, { "epoch": 3.562577065351418, "grad_norm": 0.06284237653017044, "learning_rate": 0.01, "loss": 2.0112, "step": 34671 }, { "epoch": 3.56288532675709, "grad_norm": 0.0880017802119255, "learning_rate": 0.01, "loss": 2.0034, "step": 34674 }, { "epoch": 3.563193588162762, "grad_norm": 0.06454182416200638, "learning_rate": 0.01, "loss": 2.0068, "step": 34677 }, { "epoch": 3.563501849568434, "grad_norm": 0.07054916024208069, "learning_rate": 0.01, "loss": 1.9997, "step": 34680 }, { "epoch": 3.563810110974106, "grad_norm": 0.08058945089578629, "learning_rate": 0.01, "loss": 1.9834, "step": 34683 }, { "epoch": 3.5641183723797782, "grad_norm": 0.08660910278558731, "learning_rate": 0.01, "loss": 2.0159, "step": 34686 }, { "epoch": 3.56442663378545, "grad_norm": 0.050544124096632004, "learning_rate": 0.01, "loss": 1.9975, "step": 34689 }, { "epoch": 3.564734895191122, "grad_norm": 0.056985314935445786, "learning_rate": 0.01, "loss": 2.0013, "step": 34692 }, { "epoch": 3.565043156596794, "grad_norm": 0.08204400539398193, "learning_rate": 0.01, "loss": 1.993, "step": 34695 }, { "epoch": 3.565351418002466, "grad_norm": 0.09590046107769012, "learning_rate": 0.01, "loss": 1.9995, "step": 34698 }, { "epoch": 3.565659679408138, "grad_norm": 0.03453322499990463, "learning_rate": 0.01, "loss": 2.001, "step": 34701 }, { "epoch": 3.56596794081381, "grad_norm": 0.03801025450229645, "learning_rate": 0.01, "loss": 2.0096, "step": 34704 }, { "epoch": 3.5662762022194823, "grad_norm": 0.05342378839850426, "learning_rate": 0.01, "loss": 1.9924, "step": 34707 }, { "epoch": 3.566584463625154, "grad_norm": 0.06871719658374786, "learning_rate": 0.01, "loss": 2.0254, "step": 34710 }, { "epoch": 3.566892725030826, "grad_norm": 0.06653191894292831, "learning_rate": 0.01, "loss": 1.9956, "step": 34713 }, { "epoch": 3.5672009864364984, "grad_norm": 0.10531385242938995, "learning_rate": 0.01, "loss": 2.0151, "step": 34716 }, { "epoch": 3.56750924784217, "grad_norm": 0.05481969192624092, "learning_rate": 0.01, "loss": 2.003, "step": 34719 }, { "epoch": 3.567817509247842, "grad_norm": 0.04904542118310928, "learning_rate": 0.01, "loss": 1.9992, "step": 34722 }, { "epoch": 3.568125770653514, "grad_norm": 0.034647226333618164, "learning_rate": 0.01, "loss": 2.0232, "step": 34725 }, { "epoch": 3.5684340320591863, "grad_norm": 0.07234811037778854, "learning_rate": 0.01, "loss": 1.9992, "step": 34728 }, { "epoch": 3.568742293464858, "grad_norm": 0.10623595118522644, "learning_rate": 0.01, "loss": 1.9955, "step": 34731 }, { "epoch": 3.5690505548705302, "grad_norm": 0.04955185204744339, "learning_rate": 0.01, "loss": 1.9894, "step": 34734 }, { "epoch": 3.5693588162762024, "grad_norm": 0.08183744549751282, "learning_rate": 0.01, "loss": 2.0236, "step": 34737 }, { "epoch": 3.569667077681874, "grad_norm": 0.04063693434000015, "learning_rate": 0.01, "loss": 1.9739, "step": 34740 }, { "epoch": 3.5699753390875464, "grad_norm": 0.05661273002624512, "learning_rate": 0.01, "loss": 2.0019, "step": 34743 }, { "epoch": 3.570283600493218, "grad_norm": 0.03300248831510544, "learning_rate": 0.01, "loss": 1.9687, "step": 34746 }, { "epoch": 3.5705918618988903, "grad_norm": 0.09492766112089157, "learning_rate": 0.01, "loss": 2.0099, "step": 34749 }, { "epoch": 3.570900123304562, "grad_norm": 0.0505099892616272, "learning_rate": 0.01, "loss": 2.0062, "step": 34752 }, { "epoch": 3.5712083847102343, "grad_norm": 0.094505175948143, "learning_rate": 0.01, "loss": 1.9948, "step": 34755 }, { "epoch": 3.5715166461159065, "grad_norm": 0.046727851033210754, "learning_rate": 0.01, "loss": 2.0081, "step": 34758 }, { "epoch": 3.571824907521578, "grad_norm": 0.06659562885761261, "learning_rate": 0.01, "loss": 2.0026, "step": 34761 }, { "epoch": 3.5721331689272504, "grad_norm": 0.0828694999217987, "learning_rate": 0.01, "loss": 1.9923, "step": 34764 }, { "epoch": 3.572441430332922, "grad_norm": 0.12309973686933517, "learning_rate": 0.01, "loss": 2.0145, "step": 34767 }, { "epoch": 3.5727496917385944, "grad_norm": 0.03831718862056732, "learning_rate": 0.01, "loss": 2.0022, "step": 34770 }, { "epoch": 3.573057953144266, "grad_norm": 0.049565766006708145, "learning_rate": 0.01, "loss": 1.9838, "step": 34773 }, { "epoch": 3.5733662145499383, "grad_norm": 0.055259205400943756, "learning_rate": 0.01, "loss": 2.0103, "step": 34776 }, { "epoch": 3.5736744759556105, "grad_norm": 0.08979593217372894, "learning_rate": 0.01, "loss": 1.9915, "step": 34779 }, { "epoch": 3.5739827373612822, "grad_norm": 0.05166800692677498, "learning_rate": 0.01, "loss": 2.005, "step": 34782 }, { "epoch": 3.5742909987669544, "grad_norm": 0.06904742866754532, "learning_rate": 0.01, "loss": 1.9823, "step": 34785 }, { "epoch": 3.5745992601726266, "grad_norm": 0.0667278841137886, "learning_rate": 0.01, "loss": 1.9862, "step": 34788 }, { "epoch": 3.5749075215782984, "grad_norm": 0.03483438491821289, "learning_rate": 0.01, "loss": 2.0044, "step": 34791 }, { "epoch": 3.57521578298397, "grad_norm": 0.08964372426271439, "learning_rate": 0.01, "loss": 1.9828, "step": 34794 }, { "epoch": 3.5755240443896423, "grad_norm": 0.09426937997341156, "learning_rate": 0.01, "loss": 2.0093, "step": 34797 }, { "epoch": 3.5758323057953145, "grad_norm": 0.0574275478720665, "learning_rate": 0.01, "loss": 2.0269, "step": 34800 }, { "epoch": 3.5761405672009863, "grad_norm": 0.03654215484857559, "learning_rate": 0.01, "loss": 1.9876, "step": 34803 }, { "epoch": 3.5764488286066585, "grad_norm": 0.05129201337695122, "learning_rate": 0.01, "loss": 2.0072, "step": 34806 }, { "epoch": 3.5767570900123307, "grad_norm": 0.07386317104101181, "learning_rate": 0.01, "loss": 2.0074, "step": 34809 }, { "epoch": 3.5770653514180024, "grad_norm": 0.1415167599916458, "learning_rate": 0.01, "loss": 2.0036, "step": 34812 }, { "epoch": 3.5773736128236746, "grad_norm": 0.04465651139616966, "learning_rate": 0.01, "loss": 1.9881, "step": 34815 }, { "epoch": 3.5776818742293464, "grad_norm": 0.05919940024614334, "learning_rate": 0.01, "loss": 2.0257, "step": 34818 }, { "epoch": 3.5779901356350186, "grad_norm": 0.03873635083436966, "learning_rate": 0.01, "loss": 2.0034, "step": 34821 }, { "epoch": 3.5782983970406903, "grad_norm": 0.05425990745425224, "learning_rate": 0.01, "loss": 1.9911, "step": 34824 }, { "epoch": 3.5786066584463625, "grad_norm": 0.11823546886444092, "learning_rate": 0.01, "loss": 1.9999, "step": 34827 }, { "epoch": 3.5789149198520347, "grad_norm": 0.12624318897724152, "learning_rate": 0.01, "loss": 2.0124, "step": 34830 }, { "epoch": 3.5792231812577064, "grad_norm": 0.05450008437037468, "learning_rate": 0.01, "loss": 1.9941, "step": 34833 }, { "epoch": 3.5795314426633786, "grad_norm": 0.04888772964477539, "learning_rate": 0.01, "loss": 2.007, "step": 34836 }, { "epoch": 3.579839704069051, "grad_norm": 0.033485714346170425, "learning_rate": 0.01, "loss": 2.0066, "step": 34839 }, { "epoch": 3.5801479654747226, "grad_norm": 0.08570647239685059, "learning_rate": 0.01, "loss": 2.0157, "step": 34842 }, { "epoch": 3.5804562268803943, "grad_norm": 0.06110506132245064, "learning_rate": 0.01, "loss": 2.0054, "step": 34845 }, { "epoch": 3.5807644882860665, "grad_norm": 0.08815490454435349, "learning_rate": 0.01, "loss": 1.9991, "step": 34848 }, { "epoch": 3.5810727496917387, "grad_norm": 0.09731253236532211, "learning_rate": 0.01, "loss": 1.9763, "step": 34851 }, { "epoch": 3.5813810110974105, "grad_norm": 0.08022457361221313, "learning_rate": 0.01, "loss": 2.0095, "step": 34854 }, { "epoch": 3.5816892725030827, "grad_norm": 0.042772453278303146, "learning_rate": 0.01, "loss": 2.0041, "step": 34857 }, { "epoch": 3.581997533908755, "grad_norm": 0.030573785305023193, "learning_rate": 0.01, "loss": 1.9908, "step": 34860 }, { "epoch": 3.5823057953144266, "grad_norm": 0.05686675012111664, "learning_rate": 0.01, "loss": 2.0131, "step": 34863 }, { "epoch": 3.582614056720099, "grad_norm": 0.04291863366961479, "learning_rate": 0.01, "loss": 1.9925, "step": 34866 }, { "epoch": 3.5829223181257706, "grad_norm": 0.0715685561299324, "learning_rate": 0.01, "loss": 2.0052, "step": 34869 }, { "epoch": 3.5832305795314427, "grad_norm": 0.06964897364377975, "learning_rate": 0.01, "loss": 2.0019, "step": 34872 }, { "epoch": 3.5835388409371145, "grad_norm": 0.043709173798561096, "learning_rate": 0.01, "loss": 1.9957, "step": 34875 }, { "epoch": 3.5838471023427867, "grad_norm": 0.06593792140483856, "learning_rate": 0.01, "loss": 2.0034, "step": 34878 }, { "epoch": 3.584155363748459, "grad_norm": 0.04702428728342056, "learning_rate": 0.01, "loss": 2.0174, "step": 34881 }, { "epoch": 3.5844636251541306, "grad_norm": 0.08917208015918732, "learning_rate": 0.01, "loss": 1.9914, "step": 34884 }, { "epoch": 3.584771886559803, "grad_norm": 0.1167001873254776, "learning_rate": 0.01, "loss": 2.0183, "step": 34887 }, { "epoch": 3.5850801479654746, "grad_norm": 0.07775711268186569, "learning_rate": 0.01, "loss": 2.0254, "step": 34890 }, { "epoch": 3.585388409371147, "grad_norm": 0.08915986865758896, "learning_rate": 0.01, "loss": 1.9832, "step": 34893 }, { "epoch": 3.5856966707768185, "grad_norm": 0.045163143426179886, "learning_rate": 0.01, "loss": 2.0024, "step": 34896 }, { "epoch": 3.5860049321824907, "grad_norm": 0.0571911595761776, "learning_rate": 0.01, "loss": 1.9897, "step": 34899 }, { "epoch": 3.586313193588163, "grad_norm": 0.0707104504108429, "learning_rate": 0.01, "loss": 2.0291, "step": 34902 }, { "epoch": 3.5866214549938347, "grad_norm": 0.04241738095879555, "learning_rate": 0.01, "loss": 1.9858, "step": 34905 }, { "epoch": 3.586929716399507, "grad_norm": 0.045163869857788086, "learning_rate": 0.01, "loss": 2.004, "step": 34908 }, { "epoch": 3.587237977805179, "grad_norm": 0.03191215172410011, "learning_rate": 0.01, "loss": 2.017, "step": 34911 }, { "epoch": 3.587546239210851, "grad_norm": 0.033771395683288574, "learning_rate": 0.01, "loss": 2.0137, "step": 34914 }, { "epoch": 3.5878545006165226, "grad_norm": 0.06919904053211212, "learning_rate": 0.01, "loss": 1.9974, "step": 34917 }, { "epoch": 3.5881627620221948, "grad_norm": 0.05089094117283821, "learning_rate": 0.01, "loss": 2.0174, "step": 34920 }, { "epoch": 3.588471023427867, "grad_norm": 0.10625668615102768, "learning_rate": 0.01, "loss": 2.0118, "step": 34923 }, { "epoch": 3.5887792848335387, "grad_norm": 0.05255312845110893, "learning_rate": 0.01, "loss": 1.9957, "step": 34926 }, { "epoch": 3.589087546239211, "grad_norm": 0.09395511448383331, "learning_rate": 0.01, "loss": 1.986, "step": 34929 }, { "epoch": 3.589395807644883, "grad_norm": 0.04209842532873154, "learning_rate": 0.01, "loss": 1.9993, "step": 34932 }, { "epoch": 3.589704069050555, "grad_norm": 0.0340830534696579, "learning_rate": 0.01, "loss": 2.036, "step": 34935 }, { "epoch": 3.590012330456227, "grad_norm": 0.041663311421871185, "learning_rate": 0.01, "loss": 1.9921, "step": 34938 }, { "epoch": 3.590320591861899, "grad_norm": 0.044666144996881485, "learning_rate": 0.01, "loss": 2.0027, "step": 34941 }, { "epoch": 3.590628853267571, "grad_norm": 0.05369654670357704, "learning_rate": 0.01, "loss": 2.0291, "step": 34944 }, { "epoch": 3.5909371146732427, "grad_norm": 0.054408960044384, "learning_rate": 0.01, "loss": 1.9996, "step": 34947 }, { "epoch": 3.591245376078915, "grad_norm": 0.09313920885324478, "learning_rate": 0.01, "loss": 1.9848, "step": 34950 }, { "epoch": 3.591553637484587, "grad_norm": 0.05207030475139618, "learning_rate": 0.01, "loss": 1.9925, "step": 34953 }, { "epoch": 3.591861898890259, "grad_norm": 0.06822825968265533, "learning_rate": 0.01, "loss": 2.0096, "step": 34956 }, { "epoch": 3.592170160295931, "grad_norm": 0.05392748489975929, "learning_rate": 0.01, "loss": 2.0129, "step": 34959 }, { "epoch": 3.592478421701603, "grad_norm": 0.06300345808267593, "learning_rate": 0.01, "loss": 2.0259, "step": 34962 }, { "epoch": 3.592786683107275, "grad_norm": 0.09526319801807404, "learning_rate": 0.01, "loss": 1.982, "step": 34965 }, { "epoch": 3.5930949445129468, "grad_norm": 0.060867104679346085, "learning_rate": 0.01, "loss": 2.0216, "step": 34968 }, { "epoch": 3.593403205918619, "grad_norm": 0.09763433039188385, "learning_rate": 0.01, "loss": 2.0151, "step": 34971 }, { "epoch": 3.593711467324291, "grad_norm": 0.05499181151390076, "learning_rate": 0.01, "loss": 2.0221, "step": 34974 }, { "epoch": 3.594019728729963, "grad_norm": 0.18828648328781128, "learning_rate": 0.01, "loss": 2.0094, "step": 34977 }, { "epoch": 3.594327990135635, "grad_norm": 0.1367756426334381, "learning_rate": 0.01, "loss": 1.9861, "step": 34980 }, { "epoch": 3.5946362515413073, "grad_norm": 0.0752996876835823, "learning_rate": 0.01, "loss": 1.9973, "step": 34983 }, { "epoch": 3.594944512946979, "grad_norm": 0.07500961422920227, "learning_rate": 0.01, "loss": 2.0196, "step": 34986 }, { "epoch": 3.595252774352651, "grad_norm": 0.08114928752183914, "learning_rate": 0.01, "loss": 1.9768, "step": 34989 }, { "epoch": 3.595561035758323, "grad_norm": 0.0735306516289711, "learning_rate": 0.01, "loss": 2.0046, "step": 34992 }, { "epoch": 3.595869297163995, "grad_norm": 0.04386765882372856, "learning_rate": 0.01, "loss": 2.0095, "step": 34995 }, { "epoch": 3.596177558569667, "grad_norm": 0.052437882870435715, "learning_rate": 0.01, "loss": 2.0008, "step": 34998 }, { "epoch": 3.596485819975339, "grad_norm": 0.044234346598386765, "learning_rate": 0.01, "loss": 2.0145, "step": 35001 }, { "epoch": 3.5967940813810113, "grad_norm": 0.04302847385406494, "learning_rate": 0.01, "loss": 2.0284, "step": 35004 }, { "epoch": 3.597102342786683, "grad_norm": 0.047404494136571884, "learning_rate": 0.01, "loss": 1.9965, "step": 35007 }, { "epoch": 3.5974106041923553, "grad_norm": 0.04339216649532318, "learning_rate": 0.01, "loss": 1.9888, "step": 35010 }, { "epoch": 3.597718865598027, "grad_norm": 0.07121206820011139, "learning_rate": 0.01, "loss": 1.9935, "step": 35013 }, { "epoch": 3.598027127003699, "grad_norm": 0.06314744055271149, "learning_rate": 0.01, "loss": 2.0062, "step": 35016 }, { "epoch": 3.598335388409371, "grad_norm": 0.0557650588452816, "learning_rate": 0.01, "loss": 1.9947, "step": 35019 }, { "epoch": 3.598643649815043, "grad_norm": 0.043066419661045074, "learning_rate": 0.01, "loss": 2.0061, "step": 35022 }, { "epoch": 3.5989519112207153, "grad_norm": 0.0739695131778717, "learning_rate": 0.01, "loss": 2.0139, "step": 35025 }, { "epoch": 3.599260172626387, "grad_norm": 0.13933037221431732, "learning_rate": 0.01, "loss": 1.9982, "step": 35028 }, { "epoch": 3.5995684340320593, "grad_norm": 0.03341522440314293, "learning_rate": 0.01, "loss": 1.9969, "step": 35031 }, { "epoch": 3.599876695437731, "grad_norm": 0.07127437740564346, "learning_rate": 0.01, "loss": 1.9988, "step": 35034 }, { "epoch": 3.6001849568434032, "grad_norm": 0.05078571289777756, "learning_rate": 0.01, "loss": 1.9961, "step": 35037 }, { "epoch": 3.600493218249075, "grad_norm": 0.049321308732032776, "learning_rate": 0.01, "loss": 2.0083, "step": 35040 }, { "epoch": 3.600801479654747, "grad_norm": 0.04840512201189995, "learning_rate": 0.01, "loss": 2.0086, "step": 35043 }, { "epoch": 3.6011097410604194, "grad_norm": 0.05541226640343666, "learning_rate": 0.01, "loss": 1.9975, "step": 35046 }, { "epoch": 3.601418002466091, "grad_norm": 0.03456197306513786, "learning_rate": 0.01, "loss": 2.0024, "step": 35049 }, { "epoch": 3.6017262638717633, "grad_norm": 0.0342303030192852, "learning_rate": 0.01, "loss": 2.0113, "step": 35052 }, { "epoch": 3.6020345252774355, "grad_norm": 0.043709222227334976, "learning_rate": 0.01, "loss": 1.9818, "step": 35055 }, { "epoch": 3.6023427866831073, "grad_norm": 0.05025354400277138, "learning_rate": 0.01, "loss": 1.9958, "step": 35058 }, { "epoch": 3.6026510480887795, "grad_norm": 0.0903453454375267, "learning_rate": 0.01, "loss": 1.9858, "step": 35061 }, { "epoch": 3.602959309494451, "grad_norm": 0.029984181746840477, "learning_rate": 0.01, "loss": 2.0049, "step": 35064 }, { "epoch": 3.6032675709001234, "grad_norm": 0.03956552594900131, "learning_rate": 0.01, "loss": 1.9821, "step": 35067 }, { "epoch": 3.603575832305795, "grad_norm": 0.0535871721804142, "learning_rate": 0.01, "loss": 2.0002, "step": 35070 }, { "epoch": 3.6038840937114673, "grad_norm": 0.07621357589960098, "learning_rate": 0.01, "loss": 1.9978, "step": 35073 }, { "epoch": 3.6041923551171395, "grad_norm": 0.08122174441814423, "learning_rate": 0.01, "loss": 2.0012, "step": 35076 }, { "epoch": 3.6045006165228113, "grad_norm": 0.04478932544589043, "learning_rate": 0.01, "loss": 2.0016, "step": 35079 }, { "epoch": 3.6048088779284835, "grad_norm": 0.07432077080011368, "learning_rate": 0.01, "loss": 1.9881, "step": 35082 }, { "epoch": 3.6051171393341552, "grad_norm": 0.08805304020643234, "learning_rate": 0.01, "loss": 2.0014, "step": 35085 }, { "epoch": 3.6054254007398274, "grad_norm": 0.15478205680847168, "learning_rate": 0.01, "loss": 1.9907, "step": 35088 }, { "epoch": 3.605733662145499, "grad_norm": 0.11871577054262161, "learning_rate": 0.01, "loss": 2.0125, "step": 35091 }, { "epoch": 3.6060419235511714, "grad_norm": 0.06366606056690216, "learning_rate": 0.01, "loss": 2.0011, "step": 35094 }, { "epoch": 3.6063501849568436, "grad_norm": 0.10421419143676758, "learning_rate": 0.01, "loss": 1.9947, "step": 35097 }, { "epoch": 3.6066584463625153, "grad_norm": 0.08190574496984482, "learning_rate": 0.01, "loss": 1.9893, "step": 35100 }, { "epoch": 3.6069667077681875, "grad_norm": 0.03852493688464165, "learning_rate": 0.01, "loss": 2.0112, "step": 35103 }, { "epoch": 3.6072749691738597, "grad_norm": 0.04792521148920059, "learning_rate": 0.01, "loss": 2.0063, "step": 35106 }, { "epoch": 3.6075832305795315, "grad_norm": 0.04788130521774292, "learning_rate": 0.01, "loss": 2.0138, "step": 35109 }, { "epoch": 3.607891491985203, "grad_norm": 0.06820268929004669, "learning_rate": 0.01, "loss": 1.9813, "step": 35112 }, { "epoch": 3.6081997533908754, "grad_norm": 0.05702923610806465, "learning_rate": 0.01, "loss": 2.0261, "step": 35115 }, { "epoch": 3.6085080147965476, "grad_norm": 0.10051339119672775, "learning_rate": 0.01, "loss": 1.9805, "step": 35118 }, { "epoch": 3.6088162762022193, "grad_norm": 0.07067910581827164, "learning_rate": 0.01, "loss": 2.0049, "step": 35121 }, { "epoch": 3.6091245376078915, "grad_norm": 0.03547900170087814, "learning_rate": 0.01, "loss": 1.9887, "step": 35124 }, { "epoch": 3.6094327990135637, "grad_norm": 0.062315478920936584, "learning_rate": 0.01, "loss": 2.0035, "step": 35127 }, { "epoch": 3.6097410604192355, "grad_norm": 0.12956464290618896, "learning_rate": 0.01, "loss": 2.0106, "step": 35130 }, { "epoch": 3.6100493218249077, "grad_norm": 0.1570357233285904, "learning_rate": 0.01, "loss": 2.0408, "step": 35133 }, { "epoch": 3.6103575832305794, "grad_norm": 0.06165264546871185, "learning_rate": 0.01, "loss": 2.0178, "step": 35136 }, { "epoch": 3.6106658446362516, "grad_norm": 0.046166177839040756, "learning_rate": 0.01, "loss": 2.0237, "step": 35139 }, { "epoch": 3.6109741060419234, "grad_norm": 0.052293550223112106, "learning_rate": 0.01, "loss": 1.9686, "step": 35142 }, { "epoch": 3.6112823674475956, "grad_norm": 0.046433527022600174, "learning_rate": 0.01, "loss": 1.9964, "step": 35145 }, { "epoch": 3.6115906288532678, "grad_norm": 0.029988888651132584, "learning_rate": 0.01, "loss": 1.9536, "step": 35148 }, { "epoch": 3.6118988902589395, "grad_norm": 0.05726629123091698, "learning_rate": 0.01, "loss": 2.0312, "step": 35151 }, { "epoch": 3.6122071516646117, "grad_norm": 0.054002657532691956, "learning_rate": 0.01, "loss": 1.9853, "step": 35154 }, { "epoch": 3.6125154130702835, "grad_norm": 0.05368361249566078, "learning_rate": 0.01, "loss": 1.978, "step": 35157 }, { "epoch": 3.6128236744759556, "grad_norm": 0.04732634872198105, "learning_rate": 0.01, "loss": 1.9876, "step": 35160 }, { "epoch": 3.6131319358816274, "grad_norm": 0.05976017564535141, "learning_rate": 0.01, "loss": 1.9695, "step": 35163 }, { "epoch": 3.6134401972872996, "grad_norm": 0.04517058655619621, "learning_rate": 0.01, "loss": 2.0137, "step": 35166 }, { "epoch": 3.613748458692972, "grad_norm": 0.12990356981754303, "learning_rate": 0.01, "loss": 2.0157, "step": 35169 }, { "epoch": 3.6140567200986435, "grad_norm": 0.059477876871824265, "learning_rate": 0.01, "loss": 1.9885, "step": 35172 }, { "epoch": 3.6143649815043157, "grad_norm": 0.05780341103672981, "learning_rate": 0.01, "loss": 2.0084, "step": 35175 }, { "epoch": 3.614673242909988, "grad_norm": 0.03971264883875847, "learning_rate": 0.01, "loss": 2.0061, "step": 35178 }, { "epoch": 3.6149815043156597, "grad_norm": 0.04069376364350319, "learning_rate": 0.01, "loss": 2.0013, "step": 35181 }, { "epoch": 3.6152897657213314, "grad_norm": 0.049401164054870605, "learning_rate": 0.01, "loss": 1.9785, "step": 35184 }, { "epoch": 3.6155980271270036, "grad_norm": 0.058909036219120026, "learning_rate": 0.01, "loss": 1.9979, "step": 35187 }, { "epoch": 3.615906288532676, "grad_norm": 0.1364602893590927, "learning_rate": 0.01, "loss": 2.0126, "step": 35190 }, { "epoch": 3.6162145499383476, "grad_norm": 0.0892588272690773, "learning_rate": 0.01, "loss": 1.9942, "step": 35193 }, { "epoch": 3.6165228113440198, "grad_norm": 0.03974326699972153, "learning_rate": 0.01, "loss": 2.0159, "step": 35196 }, { "epoch": 3.616831072749692, "grad_norm": 0.03898739442229271, "learning_rate": 0.01, "loss": 2.0303, "step": 35199 }, { "epoch": 3.6171393341553637, "grad_norm": 0.044964905828237534, "learning_rate": 0.01, "loss": 2.015, "step": 35202 }, { "epoch": 3.617447595561036, "grad_norm": 0.11181700229644775, "learning_rate": 0.01, "loss": 1.996, "step": 35205 }, { "epoch": 3.6177558569667077, "grad_norm": 0.0697384923696518, "learning_rate": 0.01, "loss": 2.0091, "step": 35208 }, { "epoch": 3.61806411837238, "grad_norm": 0.03923085704445839, "learning_rate": 0.01, "loss": 1.9976, "step": 35211 }, { "epoch": 3.6183723797780516, "grad_norm": 0.03635834902524948, "learning_rate": 0.01, "loss": 2.0082, "step": 35214 }, { "epoch": 3.618680641183724, "grad_norm": 0.05013216286897659, "learning_rate": 0.01, "loss": 2.0005, "step": 35217 }, { "epoch": 3.618988902589396, "grad_norm": 0.03531458228826523, "learning_rate": 0.01, "loss": 2.0256, "step": 35220 }, { "epoch": 3.6192971639950677, "grad_norm": 0.09228625893592834, "learning_rate": 0.01, "loss": 1.9941, "step": 35223 }, { "epoch": 3.61960542540074, "grad_norm": 0.06587129831314087, "learning_rate": 0.01, "loss": 2.0179, "step": 35226 }, { "epoch": 3.6199136868064117, "grad_norm": 0.11610520631074905, "learning_rate": 0.01, "loss": 2.0364, "step": 35229 }, { "epoch": 3.620221948212084, "grad_norm": 0.05594666674733162, "learning_rate": 0.01, "loss": 1.9921, "step": 35232 }, { "epoch": 3.6205302096177556, "grad_norm": 0.04040682688355446, "learning_rate": 0.01, "loss": 2.0213, "step": 35235 }, { "epoch": 3.620838471023428, "grad_norm": 0.038559917360544205, "learning_rate": 0.01, "loss": 1.9874, "step": 35238 }, { "epoch": 3.6211467324291, "grad_norm": 0.05012970045208931, "learning_rate": 0.01, "loss": 1.9959, "step": 35241 }, { "epoch": 3.6214549938347718, "grad_norm": 0.09769418835639954, "learning_rate": 0.01, "loss": 2.0246, "step": 35244 }, { "epoch": 3.621763255240444, "grad_norm": 0.07791107892990112, "learning_rate": 0.01, "loss": 2.0021, "step": 35247 }, { "epoch": 3.622071516646116, "grad_norm": 0.06556175649166107, "learning_rate": 0.01, "loss": 2.0175, "step": 35250 }, { "epoch": 3.622379778051788, "grad_norm": 0.04608479142189026, "learning_rate": 0.01, "loss": 2.0082, "step": 35253 }, { "epoch": 3.6226880394574597, "grad_norm": 0.03681867569684982, "learning_rate": 0.01, "loss": 2.0005, "step": 35256 }, { "epoch": 3.622996300863132, "grad_norm": 0.05940413847565651, "learning_rate": 0.01, "loss": 1.9957, "step": 35259 }, { "epoch": 3.623304562268804, "grad_norm": 0.09911686182022095, "learning_rate": 0.01, "loss": 1.9991, "step": 35262 }, { "epoch": 3.623612823674476, "grad_norm": 0.03739270940423012, "learning_rate": 0.01, "loss": 2.0285, "step": 35265 }, { "epoch": 3.623921085080148, "grad_norm": 0.11673395335674286, "learning_rate": 0.01, "loss": 2.0093, "step": 35268 }, { "epoch": 3.62422934648582, "grad_norm": 0.0795954018831253, "learning_rate": 0.01, "loss": 1.9954, "step": 35271 }, { "epoch": 3.624537607891492, "grad_norm": 0.04180069640278816, "learning_rate": 0.01, "loss": 1.9844, "step": 35274 }, { "epoch": 3.624845869297164, "grad_norm": 0.09516190737485886, "learning_rate": 0.01, "loss": 2.0031, "step": 35277 }, { "epoch": 3.625154130702836, "grad_norm": 0.11448989808559418, "learning_rate": 0.01, "loss": 2.0236, "step": 35280 }, { "epoch": 3.625462392108508, "grad_norm": 0.06945902854204178, "learning_rate": 0.01, "loss": 2.0211, "step": 35283 }, { "epoch": 3.62577065351418, "grad_norm": 0.039453648030757904, "learning_rate": 0.01, "loss": 2.0038, "step": 35286 }, { "epoch": 3.626078914919852, "grad_norm": 0.06266641616821289, "learning_rate": 0.01, "loss": 2.0267, "step": 35289 }, { "epoch": 3.626387176325524, "grad_norm": 0.04908052831888199, "learning_rate": 0.01, "loss": 1.9993, "step": 35292 }, { "epoch": 3.626695437731196, "grad_norm": 0.06186684966087341, "learning_rate": 0.01, "loss": 1.9813, "step": 35295 }, { "epoch": 3.627003699136868, "grad_norm": 0.13145780563354492, "learning_rate": 0.01, "loss": 1.9967, "step": 35298 }, { "epoch": 3.6273119605425403, "grad_norm": 0.05850294232368469, "learning_rate": 0.01, "loss": 1.9963, "step": 35301 }, { "epoch": 3.627620221948212, "grad_norm": 0.04934421926736832, "learning_rate": 0.01, "loss": 1.9952, "step": 35304 }, { "epoch": 3.627928483353884, "grad_norm": 0.07607220858335495, "learning_rate": 0.01, "loss": 2.0091, "step": 35307 }, { "epoch": 3.628236744759556, "grad_norm": 0.046615466475486755, "learning_rate": 0.01, "loss": 2.0037, "step": 35310 }, { "epoch": 3.6285450061652282, "grad_norm": 0.11176659911870956, "learning_rate": 0.01, "loss": 2.0077, "step": 35313 }, { "epoch": 3.6288532675709, "grad_norm": 0.07844175398349762, "learning_rate": 0.01, "loss": 2.0198, "step": 35316 }, { "epoch": 3.629161528976572, "grad_norm": 0.0730755403637886, "learning_rate": 0.01, "loss": 2.0014, "step": 35319 }, { "epoch": 3.6294697903822444, "grad_norm": 0.08503863960504532, "learning_rate": 0.01, "loss": 2.005, "step": 35322 }, { "epoch": 3.629778051787916, "grad_norm": 0.03936958312988281, "learning_rate": 0.01, "loss": 2.0211, "step": 35325 }, { "epoch": 3.6300863131935883, "grad_norm": 0.11586350947618484, "learning_rate": 0.01, "loss": 2.0054, "step": 35328 }, { "epoch": 3.63039457459926, "grad_norm": 0.06128397583961487, "learning_rate": 0.01, "loss": 2.0076, "step": 35331 }, { "epoch": 3.6307028360049323, "grad_norm": 0.04872961342334747, "learning_rate": 0.01, "loss": 2.017, "step": 35334 }, { "epoch": 3.631011097410604, "grad_norm": 0.08416412770748138, "learning_rate": 0.01, "loss": 2.0144, "step": 35337 }, { "epoch": 3.631319358816276, "grad_norm": 0.05844739452004433, "learning_rate": 0.01, "loss": 2.001, "step": 35340 }, { "epoch": 3.6316276202219484, "grad_norm": 0.08564214408397675, "learning_rate": 0.01, "loss": 2.0061, "step": 35343 }, { "epoch": 3.63193588162762, "grad_norm": 0.08769746124744415, "learning_rate": 0.01, "loss": 1.9914, "step": 35346 }, { "epoch": 3.6322441430332923, "grad_norm": 0.03819827735424042, "learning_rate": 0.01, "loss": 2.0103, "step": 35349 }, { "epoch": 3.632552404438964, "grad_norm": 0.12311212718486786, "learning_rate": 0.01, "loss": 1.9921, "step": 35352 }, { "epoch": 3.6328606658446363, "grad_norm": 0.037630997598171234, "learning_rate": 0.01, "loss": 2.007, "step": 35355 }, { "epoch": 3.633168927250308, "grad_norm": 0.08739642798900604, "learning_rate": 0.01, "loss": 2.0113, "step": 35358 }, { "epoch": 3.6334771886559802, "grad_norm": 0.04889992997050285, "learning_rate": 0.01, "loss": 2.0098, "step": 35361 }, { "epoch": 3.6337854500616524, "grad_norm": 0.035435836762189865, "learning_rate": 0.01, "loss": 2.0045, "step": 35364 }, { "epoch": 3.634093711467324, "grad_norm": 0.059248197823762894, "learning_rate": 0.01, "loss": 2.0159, "step": 35367 }, { "epoch": 3.6344019728729964, "grad_norm": 0.08831764757633209, "learning_rate": 0.01, "loss": 1.992, "step": 35370 }, { "epoch": 3.6347102342786686, "grad_norm": 0.14940893650054932, "learning_rate": 0.01, "loss": 2.0051, "step": 35373 }, { "epoch": 3.6350184956843403, "grad_norm": 0.06616901606321335, "learning_rate": 0.01, "loss": 1.9825, "step": 35376 }, { "epoch": 3.635326757090012, "grad_norm": 0.06391241401433945, "learning_rate": 0.01, "loss": 1.9636, "step": 35379 }, { "epoch": 3.6356350184956843, "grad_norm": 0.0580880343914032, "learning_rate": 0.01, "loss": 1.975, "step": 35382 }, { "epoch": 3.6359432799013565, "grad_norm": 0.057876862585544586, "learning_rate": 0.01, "loss": 1.9888, "step": 35385 }, { "epoch": 3.636251541307028, "grad_norm": 0.044347915798425674, "learning_rate": 0.01, "loss": 2.0051, "step": 35388 }, { "epoch": 3.6365598027127004, "grad_norm": 0.0400017574429512, "learning_rate": 0.01, "loss": 1.9722, "step": 35391 }, { "epoch": 3.6368680641183726, "grad_norm": 0.11369085311889648, "learning_rate": 0.01, "loss": 1.9855, "step": 35394 }, { "epoch": 3.6371763255240444, "grad_norm": 0.10995481163263321, "learning_rate": 0.01, "loss": 2.0037, "step": 35397 }, { "epoch": 3.6374845869297165, "grad_norm": 0.05769447609782219, "learning_rate": 0.01, "loss": 2.0109, "step": 35400 }, { "epoch": 3.6377928483353883, "grad_norm": 0.07872482389211655, "learning_rate": 0.01, "loss": 2.0101, "step": 35403 }, { "epoch": 3.6381011097410605, "grad_norm": 0.05196002870798111, "learning_rate": 0.01, "loss": 2.0108, "step": 35406 }, { "epoch": 3.6384093711467322, "grad_norm": 0.0493265725672245, "learning_rate": 0.01, "loss": 2.0283, "step": 35409 }, { "epoch": 3.6387176325524044, "grad_norm": 0.04237900674343109, "learning_rate": 0.01, "loss": 2.0216, "step": 35412 }, { "epoch": 3.6390258939580766, "grad_norm": 0.06882897764444351, "learning_rate": 0.01, "loss": 2.0073, "step": 35415 }, { "epoch": 3.6393341553637484, "grad_norm": 0.058774422854185104, "learning_rate": 0.01, "loss": 2.0114, "step": 35418 }, { "epoch": 3.6396424167694206, "grad_norm": 0.10032794624567032, "learning_rate": 0.01, "loss": 2.0008, "step": 35421 }, { "epoch": 3.6399506781750923, "grad_norm": 0.042988426983356476, "learning_rate": 0.01, "loss": 2.0311, "step": 35424 }, { "epoch": 3.6402589395807645, "grad_norm": 0.055391326546669006, "learning_rate": 0.01, "loss": 2.0019, "step": 35427 }, { "epoch": 3.6405672009864363, "grad_norm": 0.03977194428443909, "learning_rate": 0.01, "loss": 1.9992, "step": 35430 }, { "epoch": 3.6408754623921085, "grad_norm": 0.0903034657239914, "learning_rate": 0.01, "loss": 2.0207, "step": 35433 }, { "epoch": 3.6411837237977807, "grad_norm": 0.04610143229365349, "learning_rate": 0.01, "loss": 2.0015, "step": 35436 }, { "epoch": 3.6414919852034524, "grad_norm": 0.08295582234859467, "learning_rate": 0.01, "loss": 1.9848, "step": 35439 }, { "epoch": 3.6418002466091246, "grad_norm": 0.09561655670404434, "learning_rate": 0.01, "loss": 1.9931, "step": 35442 }, { "epoch": 3.642108508014797, "grad_norm": 0.07968761771917343, "learning_rate": 0.01, "loss": 1.9925, "step": 35445 }, { "epoch": 3.6424167694204685, "grad_norm": 0.04458128660917282, "learning_rate": 0.01, "loss": 2.0112, "step": 35448 }, { "epoch": 3.6427250308261403, "grad_norm": 0.12892895936965942, "learning_rate": 0.01, "loss": 2.0004, "step": 35451 }, { "epoch": 3.6430332922318125, "grad_norm": 0.1063292846083641, "learning_rate": 0.01, "loss": 2.0165, "step": 35454 }, { "epoch": 3.6433415536374847, "grad_norm": 0.04187742993235588, "learning_rate": 0.01, "loss": 2.0146, "step": 35457 }, { "epoch": 3.6436498150431564, "grad_norm": 0.06096494942903519, "learning_rate": 0.01, "loss": 1.9935, "step": 35460 }, { "epoch": 3.6439580764488286, "grad_norm": 0.035430386662483215, "learning_rate": 0.01, "loss": 2.0034, "step": 35463 }, { "epoch": 3.644266337854501, "grad_norm": 0.039337921887636185, "learning_rate": 0.01, "loss": 2.0163, "step": 35466 }, { "epoch": 3.6445745992601726, "grad_norm": 0.04446728155016899, "learning_rate": 0.01, "loss": 1.9804, "step": 35469 }, { "epoch": 3.6448828606658448, "grad_norm": 0.06175538897514343, "learning_rate": 0.01, "loss": 1.9991, "step": 35472 }, { "epoch": 3.6451911220715165, "grad_norm": 0.1305261105298996, "learning_rate": 0.01, "loss": 2.008, "step": 35475 }, { "epoch": 3.6454993834771887, "grad_norm": 0.14111317694187164, "learning_rate": 0.01, "loss": 1.9938, "step": 35478 }, { "epoch": 3.6458076448828605, "grad_norm": 0.13947373628616333, "learning_rate": 0.01, "loss": 2.0083, "step": 35481 }, { "epoch": 3.6461159062885327, "grad_norm": 0.057133182883262634, "learning_rate": 0.01, "loss": 1.9899, "step": 35484 }, { "epoch": 3.646424167694205, "grad_norm": 0.03709038347005844, "learning_rate": 0.01, "loss": 1.9825, "step": 35487 }, { "epoch": 3.6467324290998766, "grad_norm": 0.04786526411771774, "learning_rate": 0.01, "loss": 1.993, "step": 35490 }, { "epoch": 3.647040690505549, "grad_norm": 0.03816988691687584, "learning_rate": 0.01, "loss": 1.9762, "step": 35493 }, { "epoch": 3.6473489519112205, "grad_norm": 0.05153171718120575, "learning_rate": 0.01, "loss": 2.0236, "step": 35496 }, { "epoch": 3.6476572133168927, "grad_norm": 0.09072964638471603, "learning_rate": 0.01, "loss": 2.0153, "step": 35499 }, { "epoch": 3.6479654747225645, "grad_norm": 0.06302040815353394, "learning_rate": 0.01, "loss": 1.9963, "step": 35502 }, { "epoch": 3.6482737361282367, "grad_norm": 0.04940348491072655, "learning_rate": 0.01, "loss": 2.0015, "step": 35505 }, { "epoch": 3.648581997533909, "grad_norm": 0.04952861741185188, "learning_rate": 0.01, "loss": 2.0, "step": 35508 }, { "epoch": 3.6488902589395806, "grad_norm": 0.03227638080716133, "learning_rate": 0.01, "loss": 1.9961, "step": 35511 }, { "epoch": 3.649198520345253, "grad_norm": 0.04237101599574089, "learning_rate": 0.01, "loss": 2.019, "step": 35514 }, { "epoch": 3.649506781750925, "grad_norm": 0.10213712602853775, "learning_rate": 0.01, "loss": 2.0028, "step": 35517 }, { "epoch": 3.6498150431565968, "grad_norm": 0.0747971460223198, "learning_rate": 0.01, "loss": 2.0102, "step": 35520 }, { "epoch": 3.650123304562269, "grad_norm": 0.07060317695140839, "learning_rate": 0.01, "loss": 1.9951, "step": 35523 }, { "epoch": 3.6504315659679407, "grad_norm": 0.13745035231113434, "learning_rate": 0.01, "loss": 2.0171, "step": 35526 }, { "epoch": 3.650739827373613, "grad_norm": 0.11544306576251984, "learning_rate": 0.01, "loss": 2.0374, "step": 35529 }, { "epoch": 3.6510480887792847, "grad_norm": 0.07669739425182343, "learning_rate": 0.01, "loss": 2.0227, "step": 35532 }, { "epoch": 3.651356350184957, "grad_norm": 0.04519101604819298, "learning_rate": 0.01, "loss": 2.0136, "step": 35535 }, { "epoch": 3.651664611590629, "grad_norm": 0.04446371644735336, "learning_rate": 0.01, "loss": 2.0065, "step": 35538 }, { "epoch": 3.651972872996301, "grad_norm": 0.05518916994333267, "learning_rate": 0.01, "loss": 2.0036, "step": 35541 }, { "epoch": 3.652281134401973, "grad_norm": 0.04405786469578743, "learning_rate": 0.01, "loss": 2.0106, "step": 35544 }, { "epoch": 3.6525893958076447, "grad_norm": 0.06154394894838333, "learning_rate": 0.01, "loss": 2.0076, "step": 35547 }, { "epoch": 3.652897657213317, "grad_norm": 0.07919877767562866, "learning_rate": 0.01, "loss": 1.9895, "step": 35550 }, { "epoch": 3.6532059186189887, "grad_norm": 0.0575590617954731, "learning_rate": 0.01, "loss": 2.011, "step": 35553 }, { "epoch": 3.653514180024661, "grad_norm": 0.05680471658706665, "learning_rate": 0.01, "loss": 2.0096, "step": 35556 }, { "epoch": 3.653822441430333, "grad_norm": 0.0352899394929409, "learning_rate": 0.01, "loss": 1.9937, "step": 35559 }, { "epoch": 3.654130702836005, "grad_norm": 0.034432023763656616, "learning_rate": 0.01, "loss": 2.004, "step": 35562 }, { "epoch": 3.654438964241677, "grad_norm": 0.10298652946949005, "learning_rate": 0.01, "loss": 1.9992, "step": 35565 }, { "epoch": 3.654747225647349, "grad_norm": 0.03996056690812111, "learning_rate": 0.01, "loss": 2.007, "step": 35568 }, { "epoch": 3.655055487053021, "grad_norm": 0.07411230355501175, "learning_rate": 0.01, "loss": 1.9981, "step": 35571 }, { "epoch": 3.6553637484586927, "grad_norm": 0.04124278202652931, "learning_rate": 0.01, "loss": 1.9989, "step": 35574 }, { "epoch": 3.655672009864365, "grad_norm": 0.035065338015556335, "learning_rate": 0.01, "loss": 1.9903, "step": 35577 }, { "epoch": 3.655980271270037, "grad_norm": 0.04023493826389313, "learning_rate": 0.01, "loss": 2.0126, "step": 35580 }, { "epoch": 3.656288532675709, "grad_norm": 0.0333552286028862, "learning_rate": 0.01, "loss": 2.0139, "step": 35583 }, { "epoch": 3.656596794081381, "grad_norm": 0.1286098062992096, "learning_rate": 0.01, "loss": 1.9847, "step": 35586 }, { "epoch": 3.6569050554870532, "grad_norm": 0.061940498650074005, "learning_rate": 0.01, "loss": 2.0093, "step": 35589 }, { "epoch": 3.657213316892725, "grad_norm": 0.08766448497772217, "learning_rate": 0.01, "loss": 2.0062, "step": 35592 }, { "epoch": 3.657521578298397, "grad_norm": 0.07218505442142487, "learning_rate": 0.01, "loss": 1.9914, "step": 35595 }, { "epoch": 3.657829839704069, "grad_norm": 0.11700677126646042, "learning_rate": 0.01, "loss": 2.0164, "step": 35598 }, { "epoch": 3.658138101109741, "grad_norm": 0.05746941268444061, "learning_rate": 0.01, "loss": 1.9741, "step": 35601 }, { "epoch": 3.658446362515413, "grad_norm": 0.0517272874712944, "learning_rate": 0.01, "loss": 1.9803, "step": 35604 }, { "epoch": 3.658754623921085, "grad_norm": 0.04524237662553787, "learning_rate": 0.01, "loss": 1.9912, "step": 35607 }, { "epoch": 3.6590628853267573, "grad_norm": 0.053268514573574066, "learning_rate": 0.01, "loss": 2.0099, "step": 35610 }, { "epoch": 3.659371146732429, "grad_norm": 0.06391450762748718, "learning_rate": 0.01, "loss": 1.9958, "step": 35613 }, { "epoch": 3.659679408138101, "grad_norm": 0.06404808908700943, "learning_rate": 0.01, "loss": 2.0093, "step": 35616 }, { "epoch": 3.659987669543773, "grad_norm": 0.06734498590230942, "learning_rate": 0.01, "loss": 2.0135, "step": 35619 }, { "epoch": 3.660295930949445, "grad_norm": 0.056514717638492584, "learning_rate": 0.01, "loss": 2.0195, "step": 35622 }, { "epoch": 3.660604192355117, "grad_norm": 0.04377966374158859, "learning_rate": 0.01, "loss": 1.9945, "step": 35625 }, { "epoch": 3.660912453760789, "grad_norm": 0.0527014285326004, "learning_rate": 0.01, "loss": 2.0011, "step": 35628 }, { "epoch": 3.6612207151664613, "grad_norm": 0.08603314310312271, "learning_rate": 0.01, "loss": 1.9736, "step": 35631 }, { "epoch": 3.661528976572133, "grad_norm": 0.06637904793024063, "learning_rate": 0.01, "loss": 2.0078, "step": 35634 }, { "epoch": 3.6618372379778052, "grad_norm": 0.042211759835481644, "learning_rate": 0.01, "loss": 1.9934, "step": 35637 }, { "epoch": 3.6621454993834774, "grad_norm": 0.08589443564414978, "learning_rate": 0.01, "loss": 2.0076, "step": 35640 }, { "epoch": 3.662453760789149, "grad_norm": 0.12761181592941284, "learning_rate": 0.01, "loss": 1.9956, "step": 35643 }, { "epoch": 3.662762022194821, "grad_norm": 0.08702104538679123, "learning_rate": 0.01, "loss": 2.0147, "step": 35646 }, { "epoch": 3.663070283600493, "grad_norm": 0.1481538712978363, "learning_rate": 0.01, "loss": 2.0028, "step": 35649 }, { "epoch": 3.6633785450061653, "grad_norm": 0.05475825071334839, "learning_rate": 0.01, "loss": 1.991, "step": 35652 }, { "epoch": 3.663686806411837, "grad_norm": 0.0481879860162735, "learning_rate": 0.01, "loss": 2.0244, "step": 35655 }, { "epoch": 3.6639950678175093, "grad_norm": 0.07099757343530655, "learning_rate": 0.01, "loss": 2.027, "step": 35658 }, { "epoch": 3.6643033292231815, "grad_norm": 0.09161633253097534, "learning_rate": 0.01, "loss": 2.0139, "step": 35661 }, { "epoch": 3.664611590628853, "grad_norm": 0.06675172597169876, "learning_rate": 0.01, "loss": 1.9944, "step": 35664 }, { "epoch": 3.6649198520345254, "grad_norm": 0.06166239082813263, "learning_rate": 0.01, "loss": 1.992, "step": 35667 }, { "epoch": 3.665228113440197, "grad_norm": 0.07947093993425369, "learning_rate": 0.01, "loss": 2.0233, "step": 35670 }, { "epoch": 3.6655363748458694, "grad_norm": 0.07639001309871674, "learning_rate": 0.01, "loss": 2.0246, "step": 35673 }, { "epoch": 3.665844636251541, "grad_norm": 0.11109112948179245, "learning_rate": 0.01, "loss": 2.033, "step": 35676 }, { "epoch": 3.6661528976572133, "grad_norm": 0.0517781600356102, "learning_rate": 0.01, "loss": 2.0, "step": 35679 }, { "epoch": 3.6664611590628855, "grad_norm": 0.05615885183215141, "learning_rate": 0.01, "loss": 1.9858, "step": 35682 }, { "epoch": 3.6667694204685573, "grad_norm": 0.054977256804704666, "learning_rate": 0.01, "loss": 1.983, "step": 35685 }, { "epoch": 3.6670776818742294, "grad_norm": 0.08208134025335312, "learning_rate": 0.01, "loss": 2.0038, "step": 35688 }, { "epoch": 3.667385943279901, "grad_norm": 0.053141169250011444, "learning_rate": 0.01, "loss": 2.0, "step": 35691 }, { "epoch": 3.6676942046855734, "grad_norm": 0.0867038443684578, "learning_rate": 0.01, "loss": 1.9992, "step": 35694 }, { "epoch": 3.668002466091245, "grad_norm": 0.04153072461485863, "learning_rate": 0.01, "loss": 2.0156, "step": 35697 }, { "epoch": 3.6683107274969173, "grad_norm": 0.050722386687994, "learning_rate": 0.01, "loss": 2.0029, "step": 35700 }, { "epoch": 3.6686189889025895, "grad_norm": 0.036444198340177536, "learning_rate": 0.01, "loss": 1.98, "step": 35703 }, { "epoch": 3.6689272503082613, "grad_norm": 0.06851452589035034, "learning_rate": 0.01, "loss": 2.0365, "step": 35706 }, { "epoch": 3.6692355117139335, "grad_norm": 0.05442693457007408, "learning_rate": 0.01, "loss": 1.9783, "step": 35709 }, { "epoch": 3.6695437731196057, "grad_norm": 0.04789496958255768, "learning_rate": 0.01, "loss": 1.9919, "step": 35712 }, { "epoch": 3.6698520345252774, "grad_norm": 0.05229181796312332, "learning_rate": 0.01, "loss": 2.0011, "step": 35715 }, { "epoch": 3.670160295930949, "grad_norm": 0.05596926435828209, "learning_rate": 0.01, "loss": 1.9906, "step": 35718 }, { "epoch": 3.6704685573366214, "grad_norm": 0.04481478035449982, "learning_rate": 0.01, "loss": 2.0036, "step": 35721 }, { "epoch": 3.6707768187422936, "grad_norm": 0.053352151066064835, "learning_rate": 0.01, "loss": 1.9952, "step": 35724 }, { "epoch": 3.6710850801479653, "grad_norm": 0.07115049660205841, "learning_rate": 0.01, "loss": 1.9967, "step": 35727 }, { "epoch": 3.6713933415536375, "grad_norm": 0.06887483596801758, "learning_rate": 0.01, "loss": 1.9872, "step": 35730 }, { "epoch": 3.6717016029593097, "grad_norm": 0.14638669788837433, "learning_rate": 0.01, "loss": 2.0211, "step": 35733 }, { "epoch": 3.6720098643649814, "grad_norm": 0.06305021792650223, "learning_rate": 0.01, "loss": 2.0146, "step": 35736 }, { "epoch": 3.6723181257706536, "grad_norm": 0.041157372295856476, "learning_rate": 0.01, "loss": 2.0066, "step": 35739 }, { "epoch": 3.6726263871763254, "grad_norm": 0.10821440070867538, "learning_rate": 0.01, "loss": 1.9949, "step": 35742 }, { "epoch": 3.6729346485819976, "grad_norm": 0.06525052338838577, "learning_rate": 0.01, "loss": 2.0093, "step": 35745 }, { "epoch": 3.6732429099876693, "grad_norm": 0.03519715368747711, "learning_rate": 0.01, "loss": 2.0086, "step": 35748 }, { "epoch": 3.6735511713933415, "grad_norm": 0.0548231340944767, "learning_rate": 0.01, "loss": 1.9969, "step": 35751 }, { "epoch": 3.6738594327990137, "grad_norm": 0.08685827255249023, "learning_rate": 0.01, "loss": 1.9985, "step": 35754 }, { "epoch": 3.6741676942046855, "grad_norm": 0.05920972675085068, "learning_rate": 0.01, "loss": 1.9882, "step": 35757 }, { "epoch": 3.6744759556103577, "grad_norm": 0.060545727610588074, "learning_rate": 0.01, "loss": 2.0057, "step": 35760 }, { "epoch": 3.67478421701603, "grad_norm": 0.05436963587999344, "learning_rate": 0.01, "loss": 2.0023, "step": 35763 }, { "epoch": 3.6750924784217016, "grad_norm": 0.05975078046321869, "learning_rate": 0.01, "loss": 2.0112, "step": 35766 }, { "epoch": 3.6754007398273734, "grad_norm": 0.05793645977973938, "learning_rate": 0.01, "loss": 1.9861, "step": 35769 }, { "epoch": 3.6757090012330456, "grad_norm": 0.07932164520025253, "learning_rate": 0.01, "loss": 1.9808, "step": 35772 }, { "epoch": 3.6760172626387178, "grad_norm": 0.05182117223739624, "learning_rate": 0.01, "loss": 1.9987, "step": 35775 }, { "epoch": 3.6763255240443895, "grad_norm": 0.05408613011240959, "learning_rate": 0.01, "loss": 1.9912, "step": 35778 }, { "epoch": 3.6766337854500617, "grad_norm": 0.0396132618188858, "learning_rate": 0.01, "loss": 1.9922, "step": 35781 }, { "epoch": 3.676942046855734, "grad_norm": 0.04768325388431549, "learning_rate": 0.01, "loss": 2.0055, "step": 35784 }, { "epoch": 3.6772503082614056, "grad_norm": 0.09452294558286667, "learning_rate": 0.01, "loss": 1.9927, "step": 35787 }, { "epoch": 3.677558569667078, "grad_norm": 0.09846623986959457, "learning_rate": 0.01, "loss": 2.0182, "step": 35790 }, { "epoch": 3.6778668310727496, "grad_norm": 0.1176767498254776, "learning_rate": 0.01, "loss": 1.989, "step": 35793 }, { "epoch": 3.678175092478422, "grad_norm": 0.07070811092853546, "learning_rate": 0.01, "loss": 2.0283, "step": 35796 }, { "epoch": 3.6784833538840935, "grad_norm": 0.04359079524874687, "learning_rate": 0.01, "loss": 1.9875, "step": 35799 }, { "epoch": 3.6787916152897657, "grad_norm": 0.05718059092760086, "learning_rate": 0.01, "loss": 2.0186, "step": 35802 }, { "epoch": 3.679099876695438, "grad_norm": 0.03475908935070038, "learning_rate": 0.01, "loss": 1.9996, "step": 35805 }, { "epoch": 3.6794081381011097, "grad_norm": 0.03629058972001076, "learning_rate": 0.01, "loss": 2.0111, "step": 35808 }, { "epoch": 3.679716399506782, "grad_norm": 0.11145780235528946, "learning_rate": 0.01, "loss": 1.989, "step": 35811 }, { "epoch": 3.6800246609124536, "grad_norm": 0.07155486941337585, "learning_rate": 0.01, "loss": 2.025, "step": 35814 }, { "epoch": 3.680332922318126, "grad_norm": 0.05105472728610039, "learning_rate": 0.01, "loss": 2.0109, "step": 35817 }, { "epoch": 3.6806411837237976, "grad_norm": 0.04478003829717636, "learning_rate": 0.01, "loss": 2.0011, "step": 35820 }, { "epoch": 3.6809494451294698, "grad_norm": 0.03695627674460411, "learning_rate": 0.01, "loss": 2.0145, "step": 35823 }, { "epoch": 3.681257706535142, "grad_norm": 0.04222894459962845, "learning_rate": 0.01, "loss": 1.9761, "step": 35826 }, { "epoch": 3.6815659679408137, "grad_norm": 0.04362770542502403, "learning_rate": 0.01, "loss": 2.0082, "step": 35829 }, { "epoch": 3.681874229346486, "grad_norm": 0.07785011827945709, "learning_rate": 0.01, "loss": 1.9881, "step": 35832 }, { "epoch": 3.682182490752158, "grad_norm": 0.08382470905780792, "learning_rate": 0.01, "loss": 1.9778, "step": 35835 }, { "epoch": 3.68249075215783, "grad_norm": 0.09897731244564056, "learning_rate": 0.01, "loss": 1.9845, "step": 35838 }, { "epoch": 3.6827990135635016, "grad_norm": 0.05617908388376236, "learning_rate": 0.01, "loss": 1.977, "step": 35841 }, { "epoch": 3.683107274969174, "grad_norm": 0.07543769478797913, "learning_rate": 0.01, "loss": 1.9984, "step": 35844 }, { "epoch": 3.683415536374846, "grad_norm": 0.04036831855773926, "learning_rate": 0.01, "loss": 1.9944, "step": 35847 }, { "epoch": 3.6837237977805177, "grad_norm": 0.03254294767975807, "learning_rate": 0.01, "loss": 1.9971, "step": 35850 }, { "epoch": 3.68403205918619, "grad_norm": 0.08003180474042892, "learning_rate": 0.01, "loss": 1.9754, "step": 35853 }, { "epoch": 3.684340320591862, "grad_norm": 0.0821150541305542, "learning_rate": 0.01, "loss": 1.9944, "step": 35856 }, { "epoch": 3.684648581997534, "grad_norm": 0.15571752190589905, "learning_rate": 0.01, "loss": 1.997, "step": 35859 }, { "epoch": 3.684956843403206, "grad_norm": 0.07748742401599884, "learning_rate": 0.01, "loss": 2.001, "step": 35862 }, { "epoch": 3.685265104808878, "grad_norm": 0.08278176933526993, "learning_rate": 0.01, "loss": 2.0004, "step": 35865 }, { "epoch": 3.68557336621455, "grad_norm": 0.08014661818742752, "learning_rate": 0.01, "loss": 1.9751, "step": 35868 }, { "epoch": 3.6858816276202218, "grad_norm": 0.03511490300297737, "learning_rate": 0.01, "loss": 2.006, "step": 35871 }, { "epoch": 3.686189889025894, "grad_norm": 0.04790462180972099, "learning_rate": 0.01, "loss": 2.0058, "step": 35874 }, { "epoch": 3.686498150431566, "grad_norm": 0.05917114019393921, "learning_rate": 0.01, "loss": 1.9945, "step": 35877 }, { "epoch": 3.686806411837238, "grad_norm": 0.09026920050382614, "learning_rate": 0.01, "loss": 2.0278, "step": 35880 }, { "epoch": 3.68711467324291, "grad_norm": 0.07264431565999985, "learning_rate": 0.01, "loss": 1.9974, "step": 35883 }, { "epoch": 3.687422934648582, "grad_norm": 0.03798295557498932, "learning_rate": 0.01, "loss": 1.9907, "step": 35886 }, { "epoch": 3.687731196054254, "grad_norm": 0.1147773265838623, "learning_rate": 0.01, "loss": 1.9827, "step": 35889 }, { "epoch": 3.688039457459926, "grad_norm": 0.05281820893287659, "learning_rate": 0.01, "loss": 1.9969, "step": 35892 }, { "epoch": 3.688347718865598, "grad_norm": 0.0617707259953022, "learning_rate": 0.01, "loss": 1.9983, "step": 35895 }, { "epoch": 3.68865598027127, "grad_norm": 0.05084836110472679, "learning_rate": 0.01, "loss": 2.0124, "step": 35898 }, { "epoch": 3.688964241676942, "grad_norm": 0.04952919855713844, "learning_rate": 0.01, "loss": 1.9969, "step": 35901 }, { "epoch": 3.689272503082614, "grad_norm": 0.08321710675954819, "learning_rate": 0.01, "loss": 2.0127, "step": 35904 }, { "epoch": 3.6895807644882863, "grad_norm": 0.04074413329362869, "learning_rate": 0.01, "loss": 2.0056, "step": 35907 }, { "epoch": 3.689889025893958, "grad_norm": 0.11404154449701309, "learning_rate": 0.01, "loss": 2.0102, "step": 35910 }, { "epoch": 3.69019728729963, "grad_norm": 0.07283198833465576, "learning_rate": 0.01, "loss": 2.011, "step": 35913 }, { "epoch": 3.690505548705302, "grad_norm": 0.0645599216222763, "learning_rate": 0.01, "loss": 2.0343, "step": 35916 }, { "epoch": 3.690813810110974, "grad_norm": 0.0537913516163826, "learning_rate": 0.01, "loss": 2.0142, "step": 35919 }, { "epoch": 3.691122071516646, "grad_norm": 0.04984259232878685, "learning_rate": 0.01, "loss": 1.9863, "step": 35922 }, { "epoch": 3.691430332922318, "grad_norm": 0.05349741503596306, "learning_rate": 0.01, "loss": 1.9853, "step": 35925 }, { "epoch": 3.6917385943279903, "grad_norm": 0.05882362276315689, "learning_rate": 0.01, "loss": 2.0053, "step": 35928 }, { "epoch": 3.692046855733662, "grad_norm": 0.0809783861041069, "learning_rate": 0.01, "loss": 1.9967, "step": 35931 }, { "epoch": 3.6923551171393343, "grad_norm": 0.07075916230678558, "learning_rate": 0.01, "loss": 1.9872, "step": 35934 }, { "epoch": 3.692663378545006, "grad_norm": 0.058204181492328644, "learning_rate": 0.01, "loss": 1.9847, "step": 35937 }, { "epoch": 3.6929716399506782, "grad_norm": 0.10149111598730087, "learning_rate": 0.01, "loss": 2.0006, "step": 35940 }, { "epoch": 3.69327990135635, "grad_norm": 0.0722658559679985, "learning_rate": 0.01, "loss": 2.0364, "step": 35943 }, { "epoch": 3.693588162762022, "grad_norm": 0.08981306105852127, "learning_rate": 0.01, "loss": 2.0254, "step": 35946 }, { "epoch": 3.6938964241676944, "grad_norm": 0.04798738285899162, "learning_rate": 0.01, "loss": 2.0099, "step": 35949 }, { "epoch": 3.694204685573366, "grad_norm": 0.0338914729654789, "learning_rate": 0.01, "loss": 1.9843, "step": 35952 }, { "epoch": 3.6945129469790383, "grad_norm": 0.04813714697957039, "learning_rate": 0.01, "loss": 1.9948, "step": 35955 }, { "epoch": 3.6948212083847105, "grad_norm": 0.11086717247962952, "learning_rate": 0.01, "loss": 1.9963, "step": 35958 }, { "epoch": 3.6951294697903823, "grad_norm": 0.09754761308431625, "learning_rate": 0.01, "loss": 1.9965, "step": 35961 }, { "epoch": 3.695437731196054, "grad_norm": 0.07381515204906464, "learning_rate": 0.01, "loss": 1.9921, "step": 35964 }, { "epoch": 3.695745992601726, "grad_norm": 0.0501638762652874, "learning_rate": 0.01, "loss": 2.0231, "step": 35967 }, { "epoch": 3.6960542540073984, "grad_norm": 0.04152151197195053, "learning_rate": 0.01, "loss": 2.0033, "step": 35970 }, { "epoch": 3.69636251541307, "grad_norm": 0.06455028802156448, "learning_rate": 0.01, "loss": 2.0266, "step": 35973 }, { "epoch": 3.6966707768187423, "grad_norm": 0.07925435900688171, "learning_rate": 0.01, "loss": 2.0153, "step": 35976 }, { "epoch": 3.6969790382244145, "grad_norm": 0.17305727303028107, "learning_rate": 0.01, "loss": 1.9923, "step": 35979 }, { "epoch": 3.6972872996300863, "grad_norm": 0.04664710536599159, "learning_rate": 0.01, "loss": 1.9811, "step": 35982 }, { "epoch": 3.6975955610357585, "grad_norm": 0.03971162810921669, "learning_rate": 0.01, "loss": 2.0065, "step": 35985 }, { "epoch": 3.6979038224414302, "grad_norm": 0.04888729378581047, "learning_rate": 0.01, "loss": 1.9901, "step": 35988 }, { "epoch": 3.6982120838471024, "grad_norm": 0.03428930416703224, "learning_rate": 0.01, "loss": 1.9902, "step": 35991 }, { "epoch": 3.698520345252774, "grad_norm": 0.04609334468841553, "learning_rate": 0.01, "loss": 1.9818, "step": 35994 }, { "epoch": 3.6988286066584464, "grad_norm": 0.06719902157783508, "learning_rate": 0.01, "loss": 2.0144, "step": 35997 }, { "epoch": 3.6991368680641186, "grad_norm": 0.0409255288541317, "learning_rate": 0.01, "loss": 1.9893, "step": 36000 }, { "epoch": 3.6994451294697903, "grad_norm": 0.03683490306138992, "learning_rate": 0.01, "loss": 1.9709, "step": 36003 }, { "epoch": 3.6997533908754625, "grad_norm": 0.050057847052812576, "learning_rate": 0.01, "loss": 1.9875, "step": 36006 }, { "epoch": 3.7000616522811343, "grad_norm": 0.06051032990217209, "learning_rate": 0.01, "loss": 2.0034, "step": 36009 }, { "epoch": 3.7003699136868065, "grad_norm": 0.141363725066185, "learning_rate": 0.01, "loss": 1.9931, "step": 36012 }, { "epoch": 3.700678175092478, "grad_norm": 0.04017074033617973, "learning_rate": 0.01, "loss": 1.9919, "step": 36015 }, { "epoch": 3.7009864364981504, "grad_norm": 0.05374070256948471, "learning_rate": 0.01, "loss": 1.9975, "step": 36018 }, { "epoch": 3.7012946979038226, "grad_norm": 0.07037216424942017, "learning_rate": 0.01, "loss": 2.014, "step": 36021 }, { "epoch": 3.7016029593094943, "grad_norm": 0.06400209665298462, "learning_rate": 0.01, "loss": 2.0035, "step": 36024 }, { "epoch": 3.7019112207151665, "grad_norm": 0.04186910763382912, "learning_rate": 0.01, "loss": 1.9881, "step": 36027 }, { "epoch": 3.7022194821208387, "grad_norm": 0.05100405216217041, "learning_rate": 0.01, "loss": 2.0009, "step": 36030 }, { "epoch": 3.7025277435265105, "grad_norm": 0.06786265224218369, "learning_rate": 0.01, "loss": 2.0041, "step": 36033 }, { "epoch": 3.7028360049321822, "grad_norm": 0.058080319315195084, "learning_rate": 0.01, "loss": 1.9748, "step": 36036 }, { "epoch": 3.7031442663378544, "grad_norm": 0.09919019788503647, "learning_rate": 0.01, "loss": 2.0016, "step": 36039 }, { "epoch": 3.7034525277435266, "grad_norm": 0.08252627402544022, "learning_rate": 0.01, "loss": 2.0154, "step": 36042 }, { "epoch": 3.7037607891491984, "grad_norm": 0.06026385724544525, "learning_rate": 0.01, "loss": 1.9877, "step": 36045 }, { "epoch": 3.7040690505548706, "grad_norm": 0.08052903413772583, "learning_rate": 0.01, "loss": 1.9951, "step": 36048 }, { "epoch": 3.7043773119605428, "grad_norm": 0.0790569856762886, "learning_rate": 0.01, "loss": 1.9877, "step": 36051 }, { "epoch": 3.7046855733662145, "grad_norm": 0.04464380070567131, "learning_rate": 0.01, "loss": 2.018, "step": 36054 }, { "epoch": 3.7049938347718867, "grad_norm": 0.06567423790693283, "learning_rate": 0.01, "loss": 1.9886, "step": 36057 }, { "epoch": 3.7053020961775585, "grad_norm": 0.06602831929922104, "learning_rate": 0.01, "loss": 1.9934, "step": 36060 }, { "epoch": 3.7056103575832307, "grad_norm": 0.06838707625865936, "learning_rate": 0.01, "loss": 1.9853, "step": 36063 }, { "epoch": 3.7059186189889024, "grad_norm": 0.09807326644659042, "learning_rate": 0.01, "loss": 1.9919, "step": 36066 }, { "epoch": 3.7062268803945746, "grad_norm": 0.06948495656251907, "learning_rate": 0.01, "loss": 1.992, "step": 36069 }, { "epoch": 3.706535141800247, "grad_norm": 0.07893040776252747, "learning_rate": 0.01, "loss": 1.9804, "step": 36072 }, { "epoch": 3.7068434032059185, "grad_norm": 0.04618433490395546, "learning_rate": 0.01, "loss": 2.0149, "step": 36075 }, { "epoch": 3.7071516646115907, "grad_norm": 0.04784570261836052, "learning_rate": 0.01, "loss": 2.0144, "step": 36078 }, { "epoch": 3.7074599260172625, "grad_norm": 0.03364891558885574, "learning_rate": 0.01, "loss": 1.9914, "step": 36081 }, { "epoch": 3.7077681874229347, "grad_norm": 0.03379710018634796, "learning_rate": 0.01, "loss": 1.994, "step": 36084 }, { "epoch": 3.7080764488286064, "grad_norm": 0.03952077031135559, "learning_rate": 0.01, "loss": 2.0159, "step": 36087 }, { "epoch": 3.7083847102342786, "grad_norm": 0.07781558483839035, "learning_rate": 0.01, "loss": 2.0045, "step": 36090 }, { "epoch": 3.708692971639951, "grad_norm": 0.04583312198519707, "learning_rate": 0.01, "loss": 1.9942, "step": 36093 }, { "epoch": 3.7090012330456226, "grad_norm": 0.08673562854528427, "learning_rate": 0.01, "loss": 1.9915, "step": 36096 }, { "epoch": 3.7093094944512948, "grad_norm": 0.05132952705025673, "learning_rate": 0.01, "loss": 1.9993, "step": 36099 }, { "epoch": 3.709617755856967, "grad_norm": 0.09407106041908264, "learning_rate": 0.01, "loss": 2.0028, "step": 36102 }, { "epoch": 3.7099260172626387, "grad_norm": 0.052429962903261185, "learning_rate": 0.01, "loss": 1.9971, "step": 36105 }, { "epoch": 3.7102342786683105, "grad_norm": 0.10452061146497726, "learning_rate": 0.01, "loss": 1.991, "step": 36108 }, { "epoch": 3.7105425400739827, "grad_norm": 0.03275587409734726, "learning_rate": 0.01, "loss": 1.9941, "step": 36111 }, { "epoch": 3.710850801479655, "grad_norm": 0.10667680203914642, "learning_rate": 0.01, "loss": 1.9775, "step": 36114 }, { "epoch": 3.7111590628853266, "grad_norm": 0.08240865170955658, "learning_rate": 0.01, "loss": 2.0113, "step": 36117 }, { "epoch": 3.711467324290999, "grad_norm": 0.0850924402475357, "learning_rate": 0.01, "loss": 1.9963, "step": 36120 }, { "epoch": 3.711775585696671, "grad_norm": 0.03906584531068802, "learning_rate": 0.01, "loss": 1.9695, "step": 36123 }, { "epoch": 3.7120838471023427, "grad_norm": 0.04797567054629326, "learning_rate": 0.01, "loss": 2.0276, "step": 36126 }, { "epoch": 3.712392108508015, "grad_norm": 0.042494967579841614, "learning_rate": 0.01, "loss": 1.977, "step": 36129 }, { "epoch": 3.7127003699136867, "grad_norm": 0.05491645634174347, "learning_rate": 0.01, "loss": 2.0033, "step": 36132 }, { "epoch": 3.713008631319359, "grad_norm": 0.10729935020208359, "learning_rate": 0.01, "loss": 2.0001, "step": 36135 }, { "epoch": 3.7133168927250306, "grad_norm": 0.043170586228370667, "learning_rate": 0.01, "loss": 2.0007, "step": 36138 }, { "epoch": 3.713625154130703, "grad_norm": 0.08662062138319016, "learning_rate": 0.01, "loss": 2.0094, "step": 36141 }, { "epoch": 3.713933415536375, "grad_norm": 0.11379045993089676, "learning_rate": 0.01, "loss": 1.9953, "step": 36144 }, { "epoch": 3.7142416769420468, "grad_norm": 0.03698570281267166, "learning_rate": 0.01, "loss": 1.9863, "step": 36147 }, { "epoch": 3.714549938347719, "grad_norm": 0.11952532082796097, "learning_rate": 0.01, "loss": 1.9923, "step": 36150 }, { "epoch": 3.7148581997533907, "grad_norm": 0.06563457101583481, "learning_rate": 0.01, "loss": 1.9981, "step": 36153 }, { "epoch": 3.715166461159063, "grad_norm": 0.07150832563638687, "learning_rate": 0.01, "loss": 2.012, "step": 36156 }, { "epoch": 3.7154747225647347, "grad_norm": 0.04657771810889244, "learning_rate": 0.01, "loss": 2.0056, "step": 36159 }, { "epoch": 3.715782983970407, "grad_norm": 0.055245641618967056, "learning_rate": 0.01, "loss": 2.0095, "step": 36162 }, { "epoch": 3.716091245376079, "grad_norm": 0.040487829595804214, "learning_rate": 0.01, "loss": 2.0027, "step": 36165 }, { "epoch": 3.716399506781751, "grad_norm": 0.12351846694946289, "learning_rate": 0.01, "loss": 1.9858, "step": 36168 }, { "epoch": 3.716707768187423, "grad_norm": 0.053484536707401276, "learning_rate": 0.01, "loss": 2.0019, "step": 36171 }, { "epoch": 3.717016029593095, "grad_norm": 0.06141964718699455, "learning_rate": 0.01, "loss": 2.0013, "step": 36174 }, { "epoch": 3.717324290998767, "grad_norm": 0.09509648382663727, "learning_rate": 0.01, "loss": 1.9949, "step": 36177 }, { "epoch": 3.717632552404439, "grad_norm": 0.0969184935092926, "learning_rate": 0.01, "loss": 2.0168, "step": 36180 }, { "epoch": 3.717940813810111, "grad_norm": 0.0884992703795433, "learning_rate": 0.01, "loss": 2.0186, "step": 36183 }, { "epoch": 3.718249075215783, "grad_norm": 0.07382161915302277, "learning_rate": 0.01, "loss": 1.9974, "step": 36186 }, { "epoch": 3.718557336621455, "grad_norm": 0.07776398956775665, "learning_rate": 0.01, "loss": 2.0259, "step": 36189 }, { "epoch": 3.718865598027127, "grad_norm": 0.07049771398305893, "learning_rate": 0.01, "loss": 2.0103, "step": 36192 }, { "epoch": 3.719173859432799, "grad_norm": 0.08669892698526382, "learning_rate": 0.01, "loss": 1.9649, "step": 36195 }, { "epoch": 3.719482120838471, "grad_norm": 0.05756537616252899, "learning_rate": 0.01, "loss": 2.0025, "step": 36198 }, { "epoch": 3.719790382244143, "grad_norm": 0.06954919546842575, "learning_rate": 0.01, "loss": 2.0101, "step": 36201 }, { "epoch": 3.720098643649815, "grad_norm": 0.09297992289066315, "learning_rate": 0.01, "loss": 1.9945, "step": 36204 }, { "epoch": 3.720406905055487, "grad_norm": 0.07941876351833344, "learning_rate": 0.01, "loss": 2.0001, "step": 36207 }, { "epoch": 3.720715166461159, "grad_norm": 0.0698881521821022, "learning_rate": 0.01, "loss": 1.9782, "step": 36210 }, { "epoch": 3.721023427866831, "grad_norm": 0.09939584881067276, "learning_rate": 0.01, "loss": 2.0096, "step": 36213 }, { "epoch": 3.7213316892725032, "grad_norm": 0.059318870306015015, "learning_rate": 0.01, "loss": 1.9965, "step": 36216 }, { "epoch": 3.721639950678175, "grad_norm": 0.06797734647989273, "learning_rate": 0.01, "loss": 1.992, "step": 36219 }, { "epoch": 3.721948212083847, "grad_norm": 0.03932074084877968, "learning_rate": 0.01, "loss": 2.012, "step": 36222 }, { "epoch": 3.7222564734895194, "grad_norm": 0.07458118349313736, "learning_rate": 0.01, "loss": 2.0184, "step": 36225 }, { "epoch": 3.722564734895191, "grad_norm": 0.09383490681648254, "learning_rate": 0.01, "loss": 2.0282, "step": 36228 }, { "epoch": 3.722872996300863, "grad_norm": 0.06527485698461533, "learning_rate": 0.01, "loss": 2.0048, "step": 36231 }, { "epoch": 3.723181257706535, "grad_norm": 0.050437018275260925, "learning_rate": 0.01, "loss": 1.9992, "step": 36234 }, { "epoch": 3.7234895191122073, "grad_norm": 0.04380796477198601, "learning_rate": 0.01, "loss": 2.0181, "step": 36237 }, { "epoch": 3.723797780517879, "grad_norm": 0.04857274517416954, "learning_rate": 0.01, "loss": 2.0172, "step": 36240 }, { "epoch": 3.724106041923551, "grad_norm": 0.032346438616514206, "learning_rate": 0.01, "loss": 1.9889, "step": 36243 }, { "epoch": 3.7244143033292234, "grad_norm": 0.045984454452991486, "learning_rate": 0.01, "loss": 1.9947, "step": 36246 }, { "epoch": 3.724722564734895, "grad_norm": 0.058199312537908554, "learning_rate": 0.01, "loss": 2.0142, "step": 36249 }, { "epoch": 3.7250308261405674, "grad_norm": 0.0391390286386013, "learning_rate": 0.01, "loss": 2.0105, "step": 36252 }, { "epoch": 3.725339087546239, "grad_norm": 0.058478716760873795, "learning_rate": 0.01, "loss": 2.0171, "step": 36255 }, { "epoch": 3.7256473489519113, "grad_norm": 0.10977184772491455, "learning_rate": 0.01, "loss": 1.995, "step": 36258 }, { "epoch": 3.725955610357583, "grad_norm": 0.06433524191379547, "learning_rate": 0.01, "loss": 2.0038, "step": 36261 }, { "epoch": 3.7262638717632552, "grad_norm": 0.07871260493993759, "learning_rate": 0.01, "loss": 1.9805, "step": 36264 }, { "epoch": 3.7265721331689274, "grad_norm": 0.09282384812831879, "learning_rate": 0.01, "loss": 1.991, "step": 36267 }, { "epoch": 3.726880394574599, "grad_norm": 0.03165189549326897, "learning_rate": 0.01, "loss": 1.9864, "step": 36270 }, { "epoch": 3.7271886559802714, "grad_norm": 0.0789475068449974, "learning_rate": 0.01, "loss": 2.0143, "step": 36273 }, { "epoch": 3.727496917385943, "grad_norm": 0.13641710579395294, "learning_rate": 0.01, "loss": 1.996, "step": 36276 }, { "epoch": 3.7278051787916153, "grad_norm": 0.08470667898654938, "learning_rate": 0.01, "loss": 1.9953, "step": 36279 }, { "epoch": 3.728113440197287, "grad_norm": 0.09110618382692337, "learning_rate": 0.01, "loss": 2.005, "step": 36282 }, { "epoch": 3.7284217016029593, "grad_norm": 0.079217828810215, "learning_rate": 0.01, "loss": 1.9694, "step": 36285 }, { "epoch": 3.7287299630086315, "grad_norm": 0.03365809842944145, "learning_rate": 0.01, "loss": 1.998, "step": 36288 }, { "epoch": 3.729038224414303, "grad_norm": 0.0691695362329483, "learning_rate": 0.01, "loss": 1.9971, "step": 36291 }, { "epoch": 3.7293464858199754, "grad_norm": 0.03766563534736633, "learning_rate": 0.01, "loss": 2.0054, "step": 36294 }, { "epoch": 3.7296547472256476, "grad_norm": 0.13410314917564392, "learning_rate": 0.01, "loss": 2.0295, "step": 36297 }, { "epoch": 3.7299630086313194, "grad_norm": 0.08052795380353928, "learning_rate": 0.01, "loss": 1.988, "step": 36300 }, { "epoch": 3.730271270036991, "grad_norm": 0.04205624386668205, "learning_rate": 0.01, "loss": 2.0013, "step": 36303 }, { "epoch": 3.7305795314426633, "grad_norm": 0.055557433515787125, "learning_rate": 0.01, "loss": 2.0097, "step": 36306 }, { "epoch": 3.7308877928483355, "grad_norm": 0.08415018022060394, "learning_rate": 0.01, "loss": 2.0069, "step": 36309 }, { "epoch": 3.7311960542540072, "grad_norm": 0.057384416460990906, "learning_rate": 0.01, "loss": 2.0164, "step": 36312 }, { "epoch": 3.7315043156596794, "grad_norm": 0.046833690255880356, "learning_rate": 0.01, "loss": 1.9747, "step": 36315 }, { "epoch": 3.7318125770653516, "grad_norm": 0.0527094230055809, "learning_rate": 0.01, "loss": 1.9959, "step": 36318 }, { "epoch": 3.7321208384710234, "grad_norm": 0.04863157868385315, "learning_rate": 0.01, "loss": 2.0082, "step": 36321 }, { "epoch": 3.7324290998766956, "grad_norm": 0.05578998848795891, "learning_rate": 0.01, "loss": 1.9724, "step": 36324 }, { "epoch": 3.7327373612823673, "grad_norm": 0.08895915746688843, "learning_rate": 0.01, "loss": 2.0025, "step": 36327 }, { "epoch": 3.7330456226880395, "grad_norm": 0.14340227842330933, "learning_rate": 0.01, "loss": 1.999, "step": 36330 }, { "epoch": 3.7333538840937113, "grad_norm": 0.074753038585186, "learning_rate": 0.01, "loss": 1.9733, "step": 36333 }, { "epoch": 3.7336621454993835, "grad_norm": 0.05069069564342499, "learning_rate": 0.01, "loss": 2.025, "step": 36336 }, { "epoch": 3.7339704069050557, "grad_norm": 0.0417485274374485, "learning_rate": 0.01, "loss": 2.0078, "step": 36339 }, { "epoch": 3.7342786683107274, "grad_norm": 0.04727747291326523, "learning_rate": 0.01, "loss": 2.0165, "step": 36342 }, { "epoch": 3.7345869297163996, "grad_norm": 0.056336645036935806, "learning_rate": 0.01, "loss": 1.9956, "step": 36345 }, { "epoch": 3.7348951911220714, "grad_norm": 0.09889603406190872, "learning_rate": 0.01, "loss": 1.9908, "step": 36348 }, { "epoch": 3.7352034525277436, "grad_norm": 0.06653422117233276, "learning_rate": 0.01, "loss": 1.9675, "step": 36351 }, { "epoch": 3.7355117139334153, "grad_norm": 0.05115421861410141, "learning_rate": 0.01, "loss": 2.0004, "step": 36354 }, { "epoch": 3.7358199753390875, "grad_norm": 0.04250407963991165, "learning_rate": 0.01, "loss": 1.9803, "step": 36357 }, { "epoch": 3.7361282367447597, "grad_norm": 0.037854380905628204, "learning_rate": 0.01, "loss": 2.0014, "step": 36360 }, { "epoch": 3.7364364981504314, "grad_norm": 0.06039261072874069, "learning_rate": 0.01, "loss": 2.0364, "step": 36363 }, { "epoch": 3.7367447595561036, "grad_norm": 0.09946703165769577, "learning_rate": 0.01, "loss": 2.0169, "step": 36366 }, { "epoch": 3.737053020961776, "grad_norm": 0.10038801282644272, "learning_rate": 0.01, "loss": 2.012, "step": 36369 }, { "epoch": 3.7373612823674476, "grad_norm": 0.07845285534858704, "learning_rate": 0.01, "loss": 1.9782, "step": 36372 }, { "epoch": 3.7376695437731193, "grad_norm": 0.08079241961240768, "learning_rate": 0.01, "loss": 1.994, "step": 36375 }, { "epoch": 3.7379778051787915, "grad_norm": 0.04414941743016243, "learning_rate": 0.01, "loss": 2.0103, "step": 36378 }, { "epoch": 3.7382860665844637, "grad_norm": 0.14685457944869995, "learning_rate": 0.01, "loss": 1.9965, "step": 36381 }, { "epoch": 3.7385943279901355, "grad_norm": 0.04633990302681923, "learning_rate": 0.01, "loss": 2.0, "step": 36384 }, { "epoch": 3.7389025893958077, "grad_norm": 0.04103631526231766, "learning_rate": 0.01, "loss": 2.0023, "step": 36387 }, { "epoch": 3.73921085080148, "grad_norm": 0.03546123206615448, "learning_rate": 0.01, "loss": 1.9901, "step": 36390 }, { "epoch": 3.7395191122071516, "grad_norm": 0.08792685717344284, "learning_rate": 0.01, "loss": 1.9955, "step": 36393 }, { "epoch": 3.739827373612824, "grad_norm": 0.09209249913692474, "learning_rate": 0.01, "loss": 2.0211, "step": 36396 }, { "epoch": 3.7401356350184956, "grad_norm": 0.0480416901409626, "learning_rate": 0.01, "loss": 1.9943, "step": 36399 }, { "epoch": 3.7404438964241677, "grad_norm": 0.03383079916238785, "learning_rate": 0.01, "loss": 2.0016, "step": 36402 }, { "epoch": 3.7407521578298395, "grad_norm": 0.06483932584524155, "learning_rate": 0.01, "loss": 2.0077, "step": 36405 }, { "epoch": 3.7410604192355117, "grad_norm": 0.04043160006403923, "learning_rate": 0.01, "loss": 1.994, "step": 36408 }, { "epoch": 3.741368680641184, "grad_norm": 0.047389864921569824, "learning_rate": 0.01, "loss": 1.9923, "step": 36411 }, { "epoch": 3.7416769420468556, "grad_norm": 0.04405448958277702, "learning_rate": 0.01, "loss": 1.9969, "step": 36414 }, { "epoch": 3.741985203452528, "grad_norm": 0.03635013848543167, "learning_rate": 0.01, "loss": 2.0013, "step": 36417 }, { "epoch": 3.7422934648582, "grad_norm": 0.06296937167644501, "learning_rate": 0.01, "loss": 2.0106, "step": 36420 }, { "epoch": 3.7426017262638718, "grad_norm": 0.08138510584831238, "learning_rate": 0.01, "loss": 2.0052, "step": 36423 }, { "epoch": 3.7429099876695435, "grad_norm": 0.05814410001039505, "learning_rate": 0.01, "loss": 2.0081, "step": 36426 }, { "epoch": 3.7432182490752157, "grad_norm": 0.09043169766664505, "learning_rate": 0.01, "loss": 1.9894, "step": 36429 }, { "epoch": 3.743526510480888, "grad_norm": 0.038527343422174454, "learning_rate": 0.01, "loss": 2.0191, "step": 36432 }, { "epoch": 3.7438347718865597, "grad_norm": 0.03342151269316673, "learning_rate": 0.01, "loss": 1.9951, "step": 36435 }, { "epoch": 3.744143033292232, "grad_norm": 0.054572802037000656, "learning_rate": 0.01, "loss": 2.0117, "step": 36438 }, { "epoch": 3.744451294697904, "grad_norm": 0.10896478593349457, "learning_rate": 0.01, "loss": 2.0004, "step": 36441 }, { "epoch": 3.744759556103576, "grad_norm": 0.04996712505817413, "learning_rate": 0.01, "loss": 1.9957, "step": 36444 }, { "epoch": 3.745067817509248, "grad_norm": 0.06720001995563507, "learning_rate": 0.01, "loss": 1.9883, "step": 36447 }, { "epoch": 3.7453760789149197, "grad_norm": 0.07731924951076508, "learning_rate": 0.01, "loss": 2.0132, "step": 36450 }, { "epoch": 3.745684340320592, "grad_norm": 0.07545910775661469, "learning_rate": 0.01, "loss": 2.0, "step": 36453 }, { "epoch": 3.7459926017262637, "grad_norm": 0.07236587256193161, "learning_rate": 0.01, "loss": 1.9954, "step": 36456 }, { "epoch": 3.746300863131936, "grad_norm": 0.05105200409889221, "learning_rate": 0.01, "loss": 1.986, "step": 36459 }, { "epoch": 3.746609124537608, "grad_norm": 0.06345131993293762, "learning_rate": 0.01, "loss": 1.9977, "step": 36462 }, { "epoch": 3.74691738594328, "grad_norm": 0.08864062279462814, "learning_rate": 0.01, "loss": 1.989, "step": 36465 }, { "epoch": 3.747225647348952, "grad_norm": 0.06058958172798157, "learning_rate": 0.01, "loss": 2.0155, "step": 36468 }, { "epoch": 3.7475339087546238, "grad_norm": 0.08054932206869125, "learning_rate": 0.01, "loss": 1.9682, "step": 36471 }, { "epoch": 3.747842170160296, "grad_norm": 0.04538315162062645, "learning_rate": 0.01, "loss": 1.9707, "step": 36474 }, { "epoch": 3.7481504315659677, "grad_norm": 0.11694948375225067, "learning_rate": 0.01, "loss": 1.9962, "step": 36477 }, { "epoch": 3.74845869297164, "grad_norm": 0.037599921226501465, "learning_rate": 0.01, "loss": 1.9998, "step": 36480 }, { "epoch": 3.748766954377312, "grad_norm": 0.062208350747823715, "learning_rate": 0.01, "loss": 2.0154, "step": 36483 }, { "epoch": 3.749075215782984, "grad_norm": 0.04657081514596939, "learning_rate": 0.01, "loss": 1.9954, "step": 36486 }, { "epoch": 3.749383477188656, "grad_norm": 0.11601486057043076, "learning_rate": 0.01, "loss": 2.0216, "step": 36489 }, { "epoch": 3.7496917385943282, "grad_norm": 0.04120590165257454, "learning_rate": 0.01, "loss": 2.0076, "step": 36492 }, { "epoch": 3.75, "grad_norm": 0.07783524692058563, "learning_rate": 0.01, "loss": 1.9892, "step": 36495 }, { "epoch": 3.7503082614056718, "grad_norm": 0.04452613368630409, "learning_rate": 0.01, "loss": 2.0191, "step": 36498 }, { "epoch": 3.750616522811344, "grad_norm": 0.10393857210874557, "learning_rate": 0.01, "loss": 1.9765, "step": 36501 }, { "epoch": 3.750924784217016, "grad_norm": 0.11419197171926498, "learning_rate": 0.01, "loss": 1.9791, "step": 36504 }, { "epoch": 3.751233045622688, "grad_norm": 0.0752970427274704, "learning_rate": 0.01, "loss": 2.0128, "step": 36507 }, { "epoch": 3.75154130702836, "grad_norm": 0.06409469246864319, "learning_rate": 0.01, "loss": 2.0103, "step": 36510 }, { "epoch": 3.7518495684340323, "grad_norm": 0.053437262773513794, "learning_rate": 0.01, "loss": 1.9912, "step": 36513 }, { "epoch": 3.752157829839704, "grad_norm": 0.04102127254009247, "learning_rate": 0.01, "loss": 1.999, "step": 36516 }, { "epoch": 3.7524660912453762, "grad_norm": 0.03595980256795883, "learning_rate": 0.01, "loss": 1.9903, "step": 36519 }, { "epoch": 3.752774352651048, "grad_norm": 0.10266067087650299, "learning_rate": 0.01, "loss": 1.9884, "step": 36522 }, { "epoch": 3.75308261405672, "grad_norm": 0.07302891463041306, "learning_rate": 0.01, "loss": 2.012, "step": 36525 }, { "epoch": 3.753390875462392, "grad_norm": 0.08043140172958374, "learning_rate": 0.01, "loss": 2.0129, "step": 36528 }, { "epoch": 3.753699136868064, "grad_norm": 0.0765608549118042, "learning_rate": 0.01, "loss": 2.0029, "step": 36531 }, { "epoch": 3.7540073982737363, "grad_norm": 0.040770966559648514, "learning_rate": 0.01, "loss": 1.9894, "step": 36534 }, { "epoch": 3.754315659679408, "grad_norm": 0.09125746041536331, "learning_rate": 0.01, "loss": 2.0098, "step": 36537 }, { "epoch": 3.7546239210850803, "grad_norm": 0.10734240710735321, "learning_rate": 0.01, "loss": 1.9839, "step": 36540 }, { "epoch": 3.754932182490752, "grad_norm": 0.05876481533050537, "learning_rate": 0.01, "loss": 1.9759, "step": 36543 }, { "epoch": 3.755240443896424, "grad_norm": 0.04223402962088585, "learning_rate": 0.01, "loss": 1.9709, "step": 36546 }, { "epoch": 3.755548705302096, "grad_norm": 0.03419182077050209, "learning_rate": 0.01, "loss": 1.9934, "step": 36549 }, { "epoch": 3.755856966707768, "grad_norm": 0.08762037754058838, "learning_rate": 0.01, "loss": 2.0068, "step": 36552 }, { "epoch": 3.7561652281134403, "grad_norm": 0.035800088196992874, "learning_rate": 0.01, "loss": 1.9892, "step": 36555 }, { "epoch": 3.756473489519112, "grad_norm": 0.07538183778524399, "learning_rate": 0.01, "loss": 2.0066, "step": 36558 }, { "epoch": 3.7567817509247843, "grad_norm": 0.05067085847258568, "learning_rate": 0.01, "loss": 2.0124, "step": 36561 }, { "epoch": 3.7570900123304565, "grad_norm": 0.07041851431131363, "learning_rate": 0.01, "loss": 1.9941, "step": 36564 }, { "epoch": 3.7573982737361282, "grad_norm": 0.06819632649421692, "learning_rate": 0.01, "loss": 1.9986, "step": 36567 }, { "epoch": 3.7577065351418, "grad_norm": 0.11158425360918045, "learning_rate": 0.01, "loss": 1.9883, "step": 36570 }, { "epoch": 3.758014796547472, "grad_norm": 0.09598013758659363, "learning_rate": 0.01, "loss": 2.0164, "step": 36573 }, { "epoch": 3.7583230579531444, "grad_norm": 0.0501101016998291, "learning_rate": 0.01, "loss": 2.0264, "step": 36576 }, { "epoch": 3.758631319358816, "grad_norm": 0.03867189958691597, "learning_rate": 0.01, "loss": 2.0162, "step": 36579 }, { "epoch": 3.7589395807644883, "grad_norm": 0.03836961090564728, "learning_rate": 0.01, "loss": 2.0095, "step": 36582 }, { "epoch": 3.7592478421701605, "grad_norm": 0.04135824367403984, "learning_rate": 0.01, "loss": 2.0206, "step": 36585 }, { "epoch": 3.7595561035758323, "grad_norm": 0.0922931507229805, "learning_rate": 0.01, "loss": 1.9809, "step": 36588 }, { "epoch": 3.7598643649815044, "grad_norm": 0.048077963292598724, "learning_rate": 0.01, "loss": 1.9716, "step": 36591 }, { "epoch": 3.760172626387176, "grad_norm": 0.04263912886381149, "learning_rate": 0.01, "loss": 2.0082, "step": 36594 }, { "epoch": 3.7604808877928484, "grad_norm": 0.05825957655906677, "learning_rate": 0.01, "loss": 2.0036, "step": 36597 }, { "epoch": 3.76078914919852, "grad_norm": 0.04579320177435875, "learning_rate": 0.01, "loss": 1.9677, "step": 36600 }, { "epoch": 3.7610974106041923, "grad_norm": 0.07970302551984787, "learning_rate": 0.01, "loss": 2.0029, "step": 36603 }, { "epoch": 3.7614056720098645, "grad_norm": 0.1091640293598175, "learning_rate": 0.01, "loss": 1.9974, "step": 36606 }, { "epoch": 3.7617139334155363, "grad_norm": 0.07577066123485565, "learning_rate": 0.01, "loss": 2.0077, "step": 36609 }, { "epoch": 3.7620221948212085, "grad_norm": 0.07901261001825333, "learning_rate": 0.01, "loss": 2.0055, "step": 36612 }, { "epoch": 3.7623304562268807, "grad_norm": 0.040557824075222015, "learning_rate": 0.01, "loss": 2.0129, "step": 36615 }, { "epoch": 3.7626387176325524, "grad_norm": 0.03163420036435127, "learning_rate": 0.01, "loss": 1.9782, "step": 36618 }, { "epoch": 3.762946979038224, "grad_norm": 0.0960499569773674, "learning_rate": 0.01, "loss": 1.9882, "step": 36621 }, { "epoch": 3.7632552404438964, "grad_norm": 0.05283837392926216, "learning_rate": 0.01, "loss": 2.0351, "step": 36624 }, { "epoch": 3.7635635018495686, "grad_norm": 0.03764748573303223, "learning_rate": 0.01, "loss": 1.9943, "step": 36627 }, { "epoch": 3.7638717632552403, "grad_norm": 0.07017308473587036, "learning_rate": 0.01, "loss": 2.0008, "step": 36630 }, { "epoch": 3.7641800246609125, "grad_norm": 0.06506321579217911, "learning_rate": 0.01, "loss": 1.9925, "step": 36633 }, { "epoch": 3.7644882860665847, "grad_norm": 0.040651753544807434, "learning_rate": 0.01, "loss": 1.9799, "step": 36636 }, { "epoch": 3.7647965474722564, "grad_norm": 0.0897100567817688, "learning_rate": 0.01, "loss": 2.014, "step": 36639 }, { "epoch": 3.7651048088779286, "grad_norm": 0.1067187637090683, "learning_rate": 0.01, "loss": 2.0369, "step": 36642 }, { "epoch": 3.7654130702836004, "grad_norm": 0.08220155537128448, "learning_rate": 0.01, "loss": 1.9708, "step": 36645 }, { "epoch": 3.7657213316892726, "grad_norm": 0.0830981582403183, "learning_rate": 0.01, "loss": 1.9937, "step": 36648 }, { "epoch": 3.7660295930949443, "grad_norm": 0.04124632850289345, "learning_rate": 0.01, "loss": 1.9937, "step": 36651 }, { "epoch": 3.7663378545006165, "grad_norm": 0.03570066764950752, "learning_rate": 0.01, "loss": 2.0102, "step": 36654 }, { "epoch": 3.7666461159062887, "grad_norm": 0.04715826362371445, "learning_rate": 0.01, "loss": 1.9917, "step": 36657 }, { "epoch": 3.7669543773119605, "grad_norm": 0.05129298195242882, "learning_rate": 0.01, "loss": 2.0058, "step": 36660 }, { "epoch": 3.7672626387176327, "grad_norm": 0.07690376043319702, "learning_rate": 0.01, "loss": 1.9985, "step": 36663 }, { "epoch": 3.7675709001233044, "grad_norm": 0.06316812336444855, "learning_rate": 0.01, "loss": 1.9911, "step": 36666 }, { "epoch": 3.7678791615289766, "grad_norm": 0.045724622905254364, "learning_rate": 0.01, "loss": 2.007, "step": 36669 }, { "epoch": 3.7681874229346484, "grad_norm": 0.12661319971084595, "learning_rate": 0.01, "loss": 2.0159, "step": 36672 }, { "epoch": 3.7684956843403206, "grad_norm": 0.03898628428578377, "learning_rate": 0.01, "loss": 2.0127, "step": 36675 }, { "epoch": 3.7688039457459928, "grad_norm": 0.04297169670462608, "learning_rate": 0.01, "loss": 2.0072, "step": 36678 }, { "epoch": 3.7691122071516645, "grad_norm": 0.047098271548748016, "learning_rate": 0.01, "loss": 2.0019, "step": 36681 }, { "epoch": 3.7694204685573367, "grad_norm": 0.0880102887749672, "learning_rate": 0.01, "loss": 2.0009, "step": 36684 }, { "epoch": 3.769728729963009, "grad_norm": 0.09598886221647263, "learning_rate": 0.01, "loss": 1.987, "step": 36687 }, { "epoch": 3.7700369913686806, "grad_norm": 0.07920796424150467, "learning_rate": 0.01, "loss": 1.9992, "step": 36690 }, { "epoch": 3.7703452527743524, "grad_norm": 0.09643390029668808, "learning_rate": 0.01, "loss": 2.0219, "step": 36693 }, { "epoch": 3.7706535141800246, "grad_norm": 0.07026758790016174, "learning_rate": 0.01, "loss": 2.0217, "step": 36696 }, { "epoch": 3.770961775585697, "grad_norm": 0.04533863440155983, "learning_rate": 0.01, "loss": 2.0129, "step": 36699 }, { "epoch": 3.7712700369913685, "grad_norm": 0.03511887788772583, "learning_rate": 0.01, "loss": 2.0054, "step": 36702 }, { "epoch": 3.7715782983970407, "grad_norm": 0.03846128657460213, "learning_rate": 0.01, "loss": 2.0006, "step": 36705 }, { "epoch": 3.771886559802713, "grad_norm": 0.07523466646671295, "learning_rate": 0.01, "loss": 2.0232, "step": 36708 }, { "epoch": 3.7721948212083847, "grad_norm": 0.09649614989757538, "learning_rate": 0.01, "loss": 1.9851, "step": 36711 }, { "epoch": 3.772503082614057, "grad_norm": 0.04845314472913742, "learning_rate": 0.01, "loss": 1.9994, "step": 36714 }, { "epoch": 3.7728113440197286, "grad_norm": 0.037477899342775345, "learning_rate": 0.01, "loss": 2.0113, "step": 36717 }, { "epoch": 3.773119605425401, "grad_norm": 0.08308611810207367, "learning_rate": 0.01, "loss": 2.017, "step": 36720 }, { "epoch": 3.7734278668310726, "grad_norm": 0.048357848078012466, "learning_rate": 0.01, "loss": 1.9848, "step": 36723 }, { "epoch": 3.7737361282367448, "grad_norm": 0.04424552246928215, "learning_rate": 0.01, "loss": 2.0176, "step": 36726 }, { "epoch": 3.774044389642417, "grad_norm": 0.04857276752591133, "learning_rate": 0.01, "loss": 2.0304, "step": 36729 }, { "epoch": 3.7743526510480887, "grad_norm": 0.039389774203300476, "learning_rate": 0.01, "loss": 2.0044, "step": 36732 }, { "epoch": 3.774660912453761, "grad_norm": 0.04370071366429329, "learning_rate": 0.01, "loss": 2.0095, "step": 36735 }, { "epoch": 3.7749691738594326, "grad_norm": 0.05456184223294258, "learning_rate": 0.01, "loss": 2.0155, "step": 36738 }, { "epoch": 3.775277435265105, "grad_norm": 0.0888199731707573, "learning_rate": 0.01, "loss": 2.008, "step": 36741 }, { "epoch": 3.7755856966707766, "grad_norm": 0.19867762923240662, "learning_rate": 0.01, "loss": 1.9926, "step": 36744 }, { "epoch": 3.775893958076449, "grad_norm": 0.17659060657024384, "learning_rate": 0.01, "loss": 2.0021, "step": 36747 }, { "epoch": 3.776202219482121, "grad_norm": 0.0629454255104065, "learning_rate": 0.01, "loss": 1.9917, "step": 36750 }, { "epoch": 3.7765104808877927, "grad_norm": 0.05801904946565628, "learning_rate": 0.01, "loss": 1.9972, "step": 36753 }, { "epoch": 3.776818742293465, "grad_norm": 0.04907959699630737, "learning_rate": 0.01, "loss": 1.9899, "step": 36756 }, { "epoch": 3.777127003699137, "grad_norm": 0.08606990426778793, "learning_rate": 0.01, "loss": 1.9829, "step": 36759 }, { "epoch": 3.777435265104809, "grad_norm": 0.03293627128005028, "learning_rate": 0.01, "loss": 2.0177, "step": 36762 }, { "epoch": 3.7777435265104806, "grad_norm": 0.033272456377744675, "learning_rate": 0.01, "loss": 1.982, "step": 36765 }, { "epoch": 3.778051787916153, "grad_norm": 0.10520119965076447, "learning_rate": 0.01, "loss": 1.9592, "step": 36768 }, { "epoch": 3.778360049321825, "grad_norm": 0.03949661925435066, "learning_rate": 0.01, "loss": 2.0025, "step": 36771 }, { "epoch": 3.7786683107274968, "grad_norm": 0.10781146585941315, "learning_rate": 0.01, "loss": 2.0142, "step": 36774 }, { "epoch": 3.778976572133169, "grad_norm": 0.1707312911748886, "learning_rate": 0.01, "loss": 2.0072, "step": 36777 }, { "epoch": 3.779284833538841, "grad_norm": 0.10689792037010193, "learning_rate": 0.01, "loss": 1.9979, "step": 36780 }, { "epoch": 3.779593094944513, "grad_norm": 0.06377099454402924, "learning_rate": 0.01, "loss": 2.0019, "step": 36783 }, { "epoch": 3.779901356350185, "grad_norm": 0.050422243773937225, "learning_rate": 0.01, "loss": 1.9994, "step": 36786 }, { "epoch": 3.780209617755857, "grad_norm": 0.041165612637996674, "learning_rate": 0.01, "loss": 2.0116, "step": 36789 }, { "epoch": 3.780517879161529, "grad_norm": 0.07220548391342163, "learning_rate": 0.01, "loss": 2.0089, "step": 36792 }, { "epoch": 3.780826140567201, "grad_norm": 0.05860710144042969, "learning_rate": 0.01, "loss": 2.0221, "step": 36795 }, { "epoch": 3.781134401972873, "grad_norm": 0.03842824697494507, "learning_rate": 0.01, "loss": 1.9945, "step": 36798 }, { "epoch": 3.781442663378545, "grad_norm": 0.04346943646669388, "learning_rate": 0.01, "loss": 2.0203, "step": 36801 }, { "epoch": 3.781750924784217, "grad_norm": 0.16297590732574463, "learning_rate": 0.01, "loss": 2.0002, "step": 36804 }, { "epoch": 3.782059186189889, "grad_norm": 0.06720095127820969, "learning_rate": 0.01, "loss": 2.0, "step": 36807 }, { "epoch": 3.782367447595561, "grad_norm": 0.04820172116160393, "learning_rate": 0.01, "loss": 1.9773, "step": 36810 }, { "epoch": 3.782675709001233, "grad_norm": 0.0431947223842144, "learning_rate": 0.01, "loss": 2.0213, "step": 36813 }, { "epoch": 3.782983970406905, "grad_norm": 0.0517662838101387, "learning_rate": 0.01, "loss": 2.016, "step": 36816 }, { "epoch": 3.783292231812577, "grad_norm": 0.06249597296118736, "learning_rate": 0.01, "loss": 1.9905, "step": 36819 }, { "epoch": 3.783600493218249, "grad_norm": 0.04001644626259804, "learning_rate": 0.01, "loss": 2.0082, "step": 36822 }, { "epoch": 3.783908754623921, "grad_norm": 0.0713401585817337, "learning_rate": 0.01, "loss": 1.9978, "step": 36825 }, { "epoch": 3.784217016029593, "grad_norm": 0.08583984524011612, "learning_rate": 0.01, "loss": 2.0047, "step": 36828 }, { "epoch": 3.7845252774352653, "grad_norm": 0.05482606962323189, "learning_rate": 0.01, "loss": 2.0038, "step": 36831 }, { "epoch": 3.784833538840937, "grad_norm": 0.04373779147863388, "learning_rate": 0.01, "loss": 2.0003, "step": 36834 }, { "epoch": 3.7851418002466093, "grad_norm": 0.09752769768238068, "learning_rate": 0.01, "loss": 1.9854, "step": 36837 }, { "epoch": 3.785450061652281, "grad_norm": 0.07635717839002609, "learning_rate": 0.01, "loss": 2.0065, "step": 36840 }, { "epoch": 3.7857583230579532, "grad_norm": 0.07021810859441757, "learning_rate": 0.01, "loss": 1.9888, "step": 36843 }, { "epoch": 3.786066584463625, "grad_norm": 0.0541268065571785, "learning_rate": 0.01, "loss": 2.0252, "step": 36846 }, { "epoch": 3.786374845869297, "grad_norm": 0.07205335795879364, "learning_rate": 0.01, "loss": 1.9916, "step": 36849 }, { "epoch": 3.7866831072749694, "grad_norm": 0.057571567595005035, "learning_rate": 0.01, "loss": 1.9944, "step": 36852 }, { "epoch": 3.786991368680641, "grad_norm": 0.11135399341583252, "learning_rate": 0.01, "loss": 2.0053, "step": 36855 }, { "epoch": 3.7872996300863133, "grad_norm": 0.0698394924402237, "learning_rate": 0.01, "loss": 1.9832, "step": 36858 }, { "epoch": 3.787607891491985, "grad_norm": 0.09506375342607498, "learning_rate": 0.01, "loss": 1.9965, "step": 36861 }, { "epoch": 3.7879161528976573, "grad_norm": 0.08534973114728928, "learning_rate": 0.01, "loss": 1.9893, "step": 36864 }, { "epoch": 3.788224414303329, "grad_norm": 0.08360368013381958, "learning_rate": 0.01, "loss": 1.9962, "step": 36867 }, { "epoch": 3.788532675709001, "grad_norm": 0.08635003864765167, "learning_rate": 0.01, "loss": 2.0042, "step": 36870 }, { "epoch": 3.7888409371146734, "grad_norm": 0.033921390771865845, "learning_rate": 0.01, "loss": 2.0134, "step": 36873 }, { "epoch": 3.789149198520345, "grad_norm": 0.04312542825937271, "learning_rate": 0.01, "loss": 2.0188, "step": 36876 }, { "epoch": 3.7894574599260173, "grad_norm": 0.06157702952623367, "learning_rate": 0.01, "loss": 1.9622, "step": 36879 }, { "epoch": 3.7897657213316895, "grad_norm": 0.05039060115814209, "learning_rate": 0.01, "loss": 2.0035, "step": 36882 }, { "epoch": 3.7900739827373613, "grad_norm": 0.05873194709420204, "learning_rate": 0.01, "loss": 2.0335, "step": 36885 }, { "epoch": 3.790382244143033, "grad_norm": 0.07444643974304199, "learning_rate": 0.01, "loss": 1.9708, "step": 36888 }, { "epoch": 3.7906905055487052, "grad_norm": 0.06560081243515015, "learning_rate": 0.01, "loss": 2.0251, "step": 36891 }, { "epoch": 3.7909987669543774, "grad_norm": 0.08456238359212875, "learning_rate": 0.01, "loss": 1.9889, "step": 36894 }, { "epoch": 3.791307028360049, "grad_norm": 0.09048344194889069, "learning_rate": 0.01, "loss": 1.9992, "step": 36897 }, { "epoch": 3.7916152897657214, "grad_norm": 0.07640919834375381, "learning_rate": 0.01, "loss": 2.0104, "step": 36900 }, { "epoch": 3.7919235511713936, "grad_norm": 0.0968911275267601, "learning_rate": 0.01, "loss": 1.9664, "step": 36903 }, { "epoch": 3.7922318125770653, "grad_norm": 0.08743193000555038, "learning_rate": 0.01, "loss": 1.9942, "step": 36906 }, { "epoch": 3.7925400739827375, "grad_norm": 0.056414127349853516, "learning_rate": 0.01, "loss": 1.9847, "step": 36909 }, { "epoch": 3.7928483353884093, "grad_norm": 0.04174700006842613, "learning_rate": 0.01, "loss": 2.0113, "step": 36912 }, { "epoch": 3.7931565967940815, "grad_norm": 0.03753121197223663, "learning_rate": 0.01, "loss": 1.978, "step": 36915 }, { "epoch": 3.793464858199753, "grad_norm": 0.04957683011889458, "learning_rate": 0.01, "loss": 2.0017, "step": 36918 }, { "epoch": 3.7937731196054254, "grad_norm": 0.051355455070734024, "learning_rate": 0.01, "loss": 2.0076, "step": 36921 }, { "epoch": 3.7940813810110976, "grad_norm": 0.0345148891210556, "learning_rate": 0.01, "loss": 1.9869, "step": 36924 }, { "epoch": 3.7943896424167693, "grad_norm": 0.12389633059501648, "learning_rate": 0.01, "loss": 2.0207, "step": 36927 }, { "epoch": 3.7946979038224415, "grad_norm": 0.05504097789525986, "learning_rate": 0.01, "loss": 2.0119, "step": 36930 }, { "epoch": 3.7950061652281133, "grad_norm": 0.044859953224658966, "learning_rate": 0.01, "loss": 2.0009, "step": 36933 }, { "epoch": 3.7953144266337855, "grad_norm": 0.06867794692516327, "learning_rate": 0.01, "loss": 1.9942, "step": 36936 }, { "epoch": 3.7956226880394572, "grad_norm": 0.10308001190423965, "learning_rate": 0.01, "loss": 1.9884, "step": 36939 }, { "epoch": 3.7959309494451294, "grad_norm": 0.07946982234716415, "learning_rate": 0.01, "loss": 1.9817, "step": 36942 }, { "epoch": 3.7962392108508016, "grad_norm": 0.058571770787239075, "learning_rate": 0.01, "loss": 1.9873, "step": 36945 }, { "epoch": 3.7965474722564734, "grad_norm": 0.07778012007474899, "learning_rate": 0.01, "loss": 1.9951, "step": 36948 }, { "epoch": 3.7968557336621456, "grad_norm": 0.06965510547161102, "learning_rate": 0.01, "loss": 1.9944, "step": 36951 }, { "epoch": 3.7971639950678178, "grad_norm": 0.06443614512681961, "learning_rate": 0.01, "loss": 2.012, "step": 36954 }, { "epoch": 3.7974722564734895, "grad_norm": 0.04750034958124161, "learning_rate": 0.01, "loss": 1.9962, "step": 36957 }, { "epoch": 3.7977805178791613, "grad_norm": 0.039454251527786255, "learning_rate": 0.01, "loss": 2.0343, "step": 36960 }, { "epoch": 3.7980887792848335, "grad_norm": 0.07771621644496918, "learning_rate": 0.01, "loss": 2.0194, "step": 36963 }, { "epoch": 3.7983970406905057, "grad_norm": 0.06523919850587845, "learning_rate": 0.01, "loss": 1.9954, "step": 36966 }, { "epoch": 3.7987053020961774, "grad_norm": 0.055610157549381256, "learning_rate": 0.01, "loss": 2.0062, "step": 36969 }, { "epoch": 3.7990135635018496, "grad_norm": 0.08819667249917984, "learning_rate": 0.01, "loss": 1.9859, "step": 36972 }, { "epoch": 3.799321824907522, "grad_norm": 0.05418279394507408, "learning_rate": 0.01, "loss": 1.9951, "step": 36975 }, { "epoch": 3.7996300863131935, "grad_norm": 0.06648479402065277, "learning_rate": 0.01, "loss": 2.0151, "step": 36978 }, { "epoch": 3.7999383477188657, "grad_norm": 0.04629239812493324, "learning_rate": 0.01, "loss": 1.9928, "step": 36981 }, { "epoch": 3.8002466091245375, "grad_norm": 0.042264122515916824, "learning_rate": 0.01, "loss": 1.9832, "step": 36984 }, { "epoch": 3.8005548705302097, "grad_norm": 0.10186997801065445, "learning_rate": 0.01, "loss": 1.9888, "step": 36987 }, { "epoch": 3.8008631319358814, "grad_norm": 0.09278517216444016, "learning_rate": 0.01, "loss": 2.0063, "step": 36990 }, { "epoch": 3.8011713933415536, "grad_norm": 0.07084151357412338, "learning_rate": 0.01, "loss": 1.9972, "step": 36993 }, { "epoch": 3.801479654747226, "grad_norm": 0.07990908622741699, "learning_rate": 0.01, "loss": 2.0019, "step": 36996 }, { "epoch": 3.8017879161528976, "grad_norm": 0.039643771946430206, "learning_rate": 0.01, "loss": 1.9926, "step": 36999 }, { "epoch": 3.8020961775585698, "grad_norm": 0.03678774833679199, "learning_rate": 0.01, "loss": 1.9884, "step": 37002 }, { "epoch": 3.8024044389642415, "grad_norm": 0.040611330419778824, "learning_rate": 0.01, "loss": 2.0054, "step": 37005 }, { "epoch": 3.8027127003699137, "grad_norm": 0.03812083601951599, "learning_rate": 0.01, "loss": 1.9778, "step": 37008 }, { "epoch": 3.8030209617755855, "grad_norm": 0.07582142949104309, "learning_rate": 0.01, "loss": 1.9989, "step": 37011 }, { "epoch": 3.8033292231812577, "grad_norm": 0.14520719647407532, "learning_rate": 0.01, "loss": 1.9948, "step": 37014 }, { "epoch": 3.80363748458693, "grad_norm": 0.04497974365949631, "learning_rate": 0.01, "loss": 2.0239, "step": 37017 }, { "epoch": 3.8039457459926016, "grad_norm": 0.04178651422262192, "learning_rate": 0.01, "loss": 2.0156, "step": 37020 }, { "epoch": 3.804254007398274, "grad_norm": 0.047346848994493484, "learning_rate": 0.01, "loss": 1.9734, "step": 37023 }, { "epoch": 3.804562268803946, "grad_norm": 0.04626936465501785, "learning_rate": 0.01, "loss": 2.0209, "step": 37026 }, { "epoch": 3.8048705302096177, "grad_norm": 0.055330242961645126, "learning_rate": 0.01, "loss": 2.0, "step": 37029 }, { "epoch": 3.8051787916152895, "grad_norm": 0.040033504366874695, "learning_rate": 0.01, "loss": 2.0082, "step": 37032 }, { "epoch": 3.8054870530209617, "grad_norm": 0.03897986188530922, "learning_rate": 0.01, "loss": 1.9863, "step": 37035 }, { "epoch": 3.805795314426634, "grad_norm": 0.07525118440389633, "learning_rate": 0.01, "loss": 2.0316, "step": 37038 }, { "epoch": 3.8061035758323056, "grad_norm": 0.06256567686796188, "learning_rate": 0.01, "loss": 2.0018, "step": 37041 }, { "epoch": 3.806411837237978, "grad_norm": 0.05028248950839043, "learning_rate": 0.01, "loss": 2.0111, "step": 37044 }, { "epoch": 3.80672009864365, "grad_norm": 0.03530338406562805, "learning_rate": 0.01, "loss": 1.9918, "step": 37047 }, { "epoch": 3.8070283600493218, "grad_norm": 0.044229693710803986, "learning_rate": 0.01, "loss": 1.9906, "step": 37050 }, { "epoch": 3.807336621454994, "grad_norm": 0.1117880642414093, "learning_rate": 0.01, "loss": 2.0065, "step": 37053 }, { "epoch": 3.8076448828606657, "grad_norm": 0.04328616335988045, "learning_rate": 0.01, "loss": 1.9818, "step": 37056 }, { "epoch": 3.807953144266338, "grad_norm": 0.05948890000581741, "learning_rate": 0.01, "loss": 2.0013, "step": 37059 }, { "epoch": 3.8082614056720097, "grad_norm": 0.043167680501937866, "learning_rate": 0.01, "loss": 1.9996, "step": 37062 }, { "epoch": 3.808569667077682, "grad_norm": 0.04852902144193649, "learning_rate": 0.01, "loss": 2.0178, "step": 37065 }, { "epoch": 3.808877928483354, "grad_norm": 0.06212899461388588, "learning_rate": 0.01, "loss": 1.9709, "step": 37068 }, { "epoch": 3.809186189889026, "grad_norm": 0.07403160631656647, "learning_rate": 0.01, "loss": 1.9913, "step": 37071 }, { "epoch": 3.809494451294698, "grad_norm": 0.07734464108943939, "learning_rate": 0.01, "loss": 2.008, "step": 37074 }, { "epoch": 3.80980271270037, "grad_norm": 0.05057765170931816, "learning_rate": 0.01, "loss": 1.9949, "step": 37077 }, { "epoch": 3.810110974106042, "grad_norm": 0.11746016144752502, "learning_rate": 0.01, "loss": 1.9787, "step": 37080 }, { "epoch": 3.8104192355117137, "grad_norm": 0.10531330853700638, "learning_rate": 0.01, "loss": 2.0042, "step": 37083 }, { "epoch": 3.810727496917386, "grad_norm": 0.11436725407838821, "learning_rate": 0.01, "loss": 2.0051, "step": 37086 }, { "epoch": 3.811035758323058, "grad_norm": 0.04365989565849304, "learning_rate": 0.01, "loss": 1.9909, "step": 37089 }, { "epoch": 3.81134401972873, "grad_norm": 0.04271963611245155, "learning_rate": 0.01, "loss": 1.998, "step": 37092 }, { "epoch": 3.811652281134402, "grad_norm": 0.03649172931909561, "learning_rate": 0.01, "loss": 1.9963, "step": 37095 }, { "epoch": 3.811960542540074, "grad_norm": 0.054175905883312225, "learning_rate": 0.01, "loss": 2.0175, "step": 37098 }, { "epoch": 3.812268803945746, "grad_norm": 0.041479047387838364, "learning_rate": 0.01, "loss": 1.9962, "step": 37101 }, { "epoch": 3.812577065351418, "grad_norm": 0.03695906326174736, "learning_rate": 0.01, "loss": 1.9984, "step": 37104 }, { "epoch": 3.81288532675709, "grad_norm": 0.05167945846915245, "learning_rate": 0.01, "loss": 2.0219, "step": 37107 }, { "epoch": 3.813193588162762, "grad_norm": 0.0706062912940979, "learning_rate": 0.01, "loss": 1.9862, "step": 37110 }, { "epoch": 3.813501849568434, "grad_norm": 0.04282987117767334, "learning_rate": 0.01, "loss": 1.9829, "step": 37113 }, { "epoch": 3.813810110974106, "grad_norm": 0.048825137317180634, "learning_rate": 0.01, "loss": 1.9764, "step": 37116 }, { "epoch": 3.8141183723797782, "grad_norm": 0.13310399651527405, "learning_rate": 0.01, "loss": 1.9889, "step": 37119 }, { "epoch": 3.81442663378545, "grad_norm": 0.05691874772310257, "learning_rate": 0.01, "loss": 2.0029, "step": 37122 }, { "epoch": 3.814734895191122, "grad_norm": 0.07542920112609863, "learning_rate": 0.01, "loss": 2.0014, "step": 37125 }, { "epoch": 3.815043156596794, "grad_norm": 0.09146812558174133, "learning_rate": 0.01, "loss": 1.9908, "step": 37128 }, { "epoch": 3.815351418002466, "grad_norm": 0.04742105305194855, "learning_rate": 0.01, "loss": 1.9907, "step": 37131 }, { "epoch": 3.815659679408138, "grad_norm": 0.03387041017413139, "learning_rate": 0.01, "loss": 1.9763, "step": 37134 }, { "epoch": 3.81596794081381, "grad_norm": 0.036746036261320114, "learning_rate": 0.01, "loss": 1.9919, "step": 37137 }, { "epoch": 3.8162762022194823, "grad_norm": 0.06593815982341766, "learning_rate": 0.01, "loss": 1.9814, "step": 37140 }, { "epoch": 3.816584463625154, "grad_norm": 0.10538439452648163, "learning_rate": 0.01, "loss": 2.0011, "step": 37143 }, { "epoch": 3.816892725030826, "grad_norm": 0.07849781960248947, "learning_rate": 0.01, "loss": 2.0053, "step": 37146 }, { "epoch": 3.8172009864364984, "grad_norm": 0.10899082571268082, "learning_rate": 0.01, "loss": 2.0186, "step": 37149 }, { "epoch": 3.81750924784217, "grad_norm": 0.05171935632824898, "learning_rate": 0.01, "loss": 1.9911, "step": 37152 }, { "epoch": 3.817817509247842, "grad_norm": 0.06028240546584129, "learning_rate": 0.01, "loss": 1.9911, "step": 37155 }, { "epoch": 3.818125770653514, "grad_norm": 0.0440855398774147, "learning_rate": 0.01, "loss": 2.0063, "step": 37158 }, { "epoch": 3.8184340320591863, "grad_norm": 0.05764901638031006, "learning_rate": 0.01, "loss": 1.9834, "step": 37161 }, { "epoch": 3.818742293464858, "grad_norm": 0.042115770280361176, "learning_rate": 0.01, "loss": 1.9816, "step": 37164 }, { "epoch": 3.8190505548705302, "grad_norm": 0.0819876417517662, "learning_rate": 0.01, "loss": 1.9991, "step": 37167 }, { "epoch": 3.8193588162762024, "grad_norm": 0.09432919323444366, "learning_rate": 0.01, "loss": 1.999, "step": 37170 }, { "epoch": 3.819667077681874, "grad_norm": 0.1223209798336029, "learning_rate": 0.01, "loss": 1.994, "step": 37173 }, { "epoch": 3.8199753390875464, "grad_norm": 0.08615586161613464, "learning_rate": 0.01, "loss": 1.9676, "step": 37176 }, { "epoch": 3.820283600493218, "grad_norm": 0.0847785696387291, "learning_rate": 0.01, "loss": 2.0067, "step": 37179 }, { "epoch": 3.8205918618988903, "grad_norm": 0.059272028505802155, "learning_rate": 0.01, "loss": 1.9938, "step": 37182 }, { "epoch": 3.820900123304562, "grad_norm": 0.042237572371959686, "learning_rate": 0.01, "loss": 2.0011, "step": 37185 }, { "epoch": 3.8212083847102343, "grad_norm": 0.04662346839904785, "learning_rate": 0.01, "loss": 1.989, "step": 37188 }, { "epoch": 3.8215166461159065, "grad_norm": 0.04566279426217079, "learning_rate": 0.01, "loss": 2.0093, "step": 37191 }, { "epoch": 3.821824907521578, "grad_norm": 0.07829894125461578, "learning_rate": 0.01, "loss": 1.9811, "step": 37194 }, { "epoch": 3.8221331689272504, "grad_norm": 0.09386474639177322, "learning_rate": 0.01, "loss": 2.016, "step": 37197 }, { "epoch": 3.822441430332922, "grad_norm": 0.04232482239603996, "learning_rate": 0.01, "loss": 1.9899, "step": 37200 }, { "epoch": 3.8227496917385944, "grad_norm": 0.09978868067264557, "learning_rate": 0.01, "loss": 2.0112, "step": 37203 }, { "epoch": 3.823057953144266, "grad_norm": 0.04246772453188896, "learning_rate": 0.01, "loss": 1.9953, "step": 37206 }, { "epoch": 3.8233662145499383, "grad_norm": 0.13856659829616547, "learning_rate": 0.01, "loss": 2.0061, "step": 37209 }, { "epoch": 3.8236744759556105, "grad_norm": 0.05659123882651329, "learning_rate": 0.01, "loss": 1.9963, "step": 37212 }, { "epoch": 3.8239827373612822, "grad_norm": 0.05944579839706421, "learning_rate": 0.01, "loss": 2.0016, "step": 37215 }, { "epoch": 3.8242909987669544, "grad_norm": 0.040483999997377396, "learning_rate": 0.01, "loss": 1.996, "step": 37218 }, { "epoch": 3.8245992601726266, "grad_norm": 0.05651898682117462, "learning_rate": 0.01, "loss": 1.9863, "step": 37221 }, { "epoch": 3.8249075215782984, "grad_norm": 0.04841217026114464, "learning_rate": 0.01, "loss": 1.9924, "step": 37224 }, { "epoch": 3.82521578298397, "grad_norm": 0.05571025237441063, "learning_rate": 0.01, "loss": 1.9857, "step": 37227 }, { "epoch": 3.8255240443896423, "grad_norm": 0.07893476635217667, "learning_rate": 0.01, "loss": 2.0081, "step": 37230 }, { "epoch": 3.8258323057953145, "grad_norm": 0.07275935262441635, "learning_rate": 0.01, "loss": 2.0264, "step": 37233 }, { "epoch": 3.8261405672009863, "grad_norm": 0.14288173615932465, "learning_rate": 0.01, "loss": 1.9963, "step": 37236 }, { "epoch": 3.8264488286066585, "grad_norm": 0.18501073122024536, "learning_rate": 0.01, "loss": 2.0133, "step": 37239 }, { "epoch": 3.8267570900123307, "grad_norm": 0.12003073841333389, "learning_rate": 0.01, "loss": 1.9811, "step": 37242 }, { "epoch": 3.8270653514180024, "grad_norm": 0.05952821299433708, "learning_rate": 0.01, "loss": 2.0194, "step": 37245 }, { "epoch": 3.8273736128236746, "grad_norm": 0.052555590867996216, "learning_rate": 0.01, "loss": 2.0046, "step": 37248 }, { "epoch": 3.8276818742293464, "grad_norm": 0.05527700483798981, "learning_rate": 0.01, "loss": 1.9929, "step": 37251 }, { "epoch": 3.8279901356350186, "grad_norm": 0.043385427445173264, "learning_rate": 0.01, "loss": 1.9682, "step": 37254 }, { "epoch": 3.8282983970406903, "grad_norm": 0.05360834673047066, "learning_rate": 0.01, "loss": 1.9979, "step": 37257 }, { "epoch": 3.8286066584463625, "grad_norm": 0.04542825371026993, "learning_rate": 0.01, "loss": 1.981, "step": 37260 }, { "epoch": 3.8289149198520347, "grad_norm": 0.04607833921909332, "learning_rate": 0.01, "loss": 1.9995, "step": 37263 }, { "epoch": 3.8292231812577064, "grad_norm": 0.059813372790813446, "learning_rate": 0.01, "loss": 2.0043, "step": 37266 }, { "epoch": 3.8295314426633786, "grad_norm": 0.11444689333438873, "learning_rate": 0.01, "loss": 2.0026, "step": 37269 }, { "epoch": 3.829839704069051, "grad_norm": 0.07510776817798615, "learning_rate": 0.01, "loss": 2.0019, "step": 37272 }, { "epoch": 3.8301479654747226, "grad_norm": 0.051781926304101944, "learning_rate": 0.01, "loss": 2.0194, "step": 37275 }, { "epoch": 3.8304562268803943, "grad_norm": 0.051285672932863235, "learning_rate": 0.01, "loss": 1.9982, "step": 37278 }, { "epoch": 3.8307644882860665, "grad_norm": 0.0464506596326828, "learning_rate": 0.01, "loss": 2.0033, "step": 37281 }, { "epoch": 3.8310727496917387, "grad_norm": 0.03997113183140755, "learning_rate": 0.01, "loss": 1.9985, "step": 37284 }, { "epoch": 3.8313810110974105, "grad_norm": 0.03474081680178642, "learning_rate": 0.01, "loss": 1.9885, "step": 37287 }, { "epoch": 3.8316892725030827, "grad_norm": 0.13113395869731903, "learning_rate": 0.01, "loss": 2.0031, "step": 37290 }, { "epoch": 3.831997533908755, "grad_norm": 0.03894534707069397, "learning_rate": 0.01, "loss": 2.0232, "step": 37293 }, { "epoch": 3.8323057953144266, "grad_norm": 0.030656691640615463, "learning_rate": 0.01, "loss": 2.0052, "step": 37296 }, { "epoch": 3.832614056720099, "grad_norm": 0.08632262796163559, "learning_rate": 0.01, "loss": 2.0037, "step": 37299 }, { "epoch": 3.8329223181257706, "grad_norm": 0.09762530028820038, "learning_rate": 0.01, "loss": 1.997, "step": 37302 }, { "epoch": 3.8332305795314427, "grad_norm": 0.07323916256427765, "learning_rate": 0.01, "loss": 2.0051, "step": 37305 }, { "epoch": 3.8335388409371145, "grad_norm": 0.08117776364088058, "learning_rate": 0.01, "loss": 2.0123, "step": 37308 }, { "epoch": 3.8338471023427867, "grad_norm": 0.07396585494279861, "learning_rate": 0.01, "loss": 2.0043, "step": 37311 }, { "epoch": 3.834155363748459, "grad_norm": 0.07192173600196838, "learning_rate": 0.01, "loss": 2.0249, "step": 37314 }, { "epoch": 3.8344636251541306, "grad_norm": 0.06731805950403214, "learning_rate": 0.01, "loss": 2.0005, "step": 37317 }, { "epoch": 3.834771886559803, "grad_norm": 0.06851818412542343, "learning_rate": 0.01, "loss": 1.9924, "step": 37320 }, { "epoch": 3.8350801479654746, "grad_norm": 0.07433861494064331, "learning_rate": 0.01, "loss": 2.0238, "step": 37323 }, { "epoch": 3.835388409371147, "grad_norm": 0.13405515253543854, "learning_rate": 0.01, "loss": 2.0068, "step": 37326 }, { "epoch": 3.8356966707768185, "grad_norm": 0.0912710577249527, "learning_rate": 0.01, "loss": 1.9802, "step": 37329 }, { "epoch": 3.8360049321824907, "grad_norm": 0.05791820213198662, "learning_rate": 0.01, "loss": 1.9655, "step": 37332 }, { "epoch": 3.836313193588163, "grad_norm": 0.06457755714654922, "learning_rate": 0.01, "loss": 1.9818, "step": 37335 }, { "epoch": 3.8366214549938347, "grad_norm": 0.06486820429563522, "learning_rate": 0.01, "loss": 1.9909, "step": 37338 }, { "epoch": 3.836929716399507, "grad_norm": 0.04546729475259781, "learning_rate": 0.01, "loss": 1.9898, "step": 37341 }, { "epoch": 3.837237977805179, "grad_norm": 0.058978717774152756, "learning_rate": 0.01, "loss": 1.9948, "step": 37344 }, { "epoch": 3.837546239210851, "grad_norm": 0.09566085040569305, "learning_rate": 0.01, "loss": 2.0054, "step": 37347 }, { "epoch": 3.8378545006165226, "grad_norm": 0.038889624178409576, "learning_rate": 0.01, "loss": 2.0, "step": 37350 }, { "epoch": 3.8381627620221948, "grad_norm": 0.09589283168315887, "learning_rate": 0.01, "loss": 2.0305, "step": 37353 }, { "epoch": 3.838471023427867, "grad_norm": 0.04719104990363121, "learning_rate": 0.01, "loss": 2.0008, "step": 37356 }, { "epoch": 3.8387792848335387, "grad_norm": 0.09488753974437714, "learning_rate": 0.01, "loss": 1.9929, "step": 37359 }, { "epoch": 3.839087546239211, "grad_norm": 0.09091513603925705, "learning_rate": 0.01, "loss": 2.0032, "step": 37362 }, { "epoch": 3.839395807644883, "grad_norm": 0.06685637682676315, "learning_rate": 0.01, "loss": 1.9713, "step": 37365 }, { "epoch": 3.839704069050555, "grad_norm": 0.0734042376279831, "learning_rate": 0.01, "loss": 2.0043, "step": 37368 }, { "epoch": 3.840012330456227, "grad_norm": 0.04996969550848007, "learning_rate": 0.01, "loss": 1.9931, "step": 37371 }, { "epoch": 3.840320591861899, "grad_norm": 0.07901585847139359, "learning_rate": 0.01, "loss": 2.0007, "step": 37374 }, { "epoch": 3.840628853267571, "grad_norm": 0.06289222836494446, "learning_rate": 0.01, "loss": 1.978, "step": 37377 }, { "epoch": 3.8409371146732427, "grad_norm": 0.07179414480924606, "learning_rate": 0.01, "loss": 1.988, "step": 37380 }, { "epoch": 3.841245376078915, "grad_norm": 0.05835287645459175, "learning_rate": 0.01, "loss": 1.9777, "step": 37383 }, { "epoch": 3.841553637484587, "grad_norm": 0.07789372652769089, "learning_rate": 0.01, "loss": 1.9832, "step": 37386 }, { "epoch": 3.841861898890259, "grad_norm": 0.10866318643093109, "learning_rate": 0.01, "loss": 1.9973, "step": 37389 }, { "epoch": 3.842170160295931, "grad_norm": 0.12253516167402267, "learning_rate": 0.01, "loss": 1.9902, "step": 37392 }, { "epoch": 3.842478421701603, "grad_norm": 0.1773093193769455, "learning_rate": 0.01, "loss": 2.0107, "step": 37395 }, { "epoch": 3.842786683107275, "grad_norm": 0.12998591363430023, "learning_rate": 0.01, "loss": 1.9986, "step": 37398 }, { "epoch": 3.8430949445129468, "grad_norm": 0.07139603793621063, "learning_rate": 0.01, "loss": 2.0066, "step": 37401 }, { "epoch": 3.843403205918619, "grad_norm": 0.05447724089026451, "learning_rate": 0.01, "loss": 1.9924, "step": 37404 }, { "epoch": 3.843711467324291, "grad_norm": 0.053450725972652435, "learning_rate": 0.01, "loss": 2.0279, "step": 37407 }, { "epoch": 3.844019728729963, "grad_norm": 0.042635634541511536, "learning_rate": 0.01, "loss": 2.0146, "step": 37410 }, { "epoch": 3.844327990135635, "grad_norm": 0.06024963781237602, "learning_rate": 0.01, "loss": 1.9926, "step": 37413 }, { "epoch": 3.8446362515413073, "grad_norm": 0.04149017482995987, "learning_rate": 0.01, "loss": 1.9986, "step": 37416 }, { "epoch": 3.844944512946979, "grad_norm": 0.043965183198451996, "learning_rate": 0.01, "loss": 1.9881, "step": 37419 }, { "epoch": 3.845252774352651, "grad_norm": 0.06676291674375534, "learning_rate": 0.01, "loss": 1.9913, "step": 37422 }, { "epoch": 3.845561035758323, "grad_norm": 0.1391642689704895, "learning_rate": 0.01, "loss": 1.9918, "step": 37425 }, { "epoch": 3.845869297163995, "grad_norm": 0.12824542820453644, "learning_rate": 0.01, "loss": 2.0091, "step": 37428 }, { "epoch": 3.846177558569667, "grad_norm": 0.038818515837192535, "learning_rate": 0.01, "loss": 2.0008, "step": 37431 }, { "epoch": 3.846485819975339, "grad_norm": 0.06105605140328407, "learning_rate": 0.01, "loss": 2.005, "step": 37434 }, { "epoch": 3.8467940813810113, "grad_norm": 0.05692486837506294, "learning_rate": 0.01, "loss": 2.0234, "step": 37437 }, { "epoch": 3.847102342786683, "grad_norm": 0.03328600525856018, "learning_rate": 0.01, "loss": 1.9845, "step": 37440 }, { "epoch": 3.8474106041923553, "grad_norm": 0.05563362315297127, "learning_rate": 0.01, "loss": 1.9972, "step": 37443 }, { "epoch": 3.847718865598027, "grad_norm": 0.04523022845387459, "learning_rate": 0.01, "loss": 1.9607, "step": 37446 }, { "epoch": 3.848027127003699, "grad_norm": 0.12618042528629303, "learning_rate": 0.01, "loss": 1.9578, "step": 37449 }, { "epoch": 3.848335388409371, "grad_norm": 0.10030481964349747, "learning_rate": 0.01, "loss": 2.002, "step": 37452 }, { "epoch": 3.848643649815043, "grad_norm": 0.09524470567703247, "learning_rate": 0.01, "loss": 1.9772, "step": 37455 }, { "epoch": 3.8489519112207153, "grad_norm": 0.06369511038064957, "learning_rate": 0.01, "loss": 1.9853, "step": 37458 }, { "epoch": 3.849260172626387, "grad_norm": 0.052571844309568405, "learning_rate": 0.01, "loss": 1.9925, "step": 37461 }, { "epoch": 3.8495684340320593, "grad_norm": 0.040871210396289825, "learning_rate": 0.01, "loss": 2.0192, "step": 37464 }, { "epoch": 3.849876695437731, "grad_norm": 0.0393734946846962, "learning_rate": 0.01, "loss": 2.0312, "step": 37467 }, { "epoch": 3.8501849568434032, "grad_norm": 0.10289833694696426, "learning_rate": 0.01, "loss": 1.9791, "step": 37470 }, { "epoch": 3.850493218249075, "grad_norm": 0.053732771426439285, "learning_rate": 0.01, "loss": 1.9955, "step": 37473 }, { "epoch": 3.850801479654747, "grad_norm": 0.035672031342983246, "learning_rate": 0.01, "loss": 1.9896, "step": 37476 }, { "epoch": 3.8511097410604194, "grad_norm": 0.07263844460248947, "learning_rate": 0.01, "loss": 1.9779, "step": 37479 }, { "epoch": 3.851418002466091, "grad_norm": 0.06151323765516281, "learning_rate": 0.01, "loss": 1.9965, "step": 37482 }, { "epoch": 3.8517262638717633, "grad_norm": 0.08033137768507004, "learning_rate": 0.01, "loss": 2.0161, "step": 37485 }, { "epoch": 3.8520345252774355, "grad_norm": 0.09900759160518646, "learning_rate": 0.01, "loss": 2.0116, "step": 37488 }, { "epoch": 3.8523427866831073, "grad_norm": 0.061891715973615646, "learning_rate": 0.01, "loss": 1.9932, "step": 37491 }, { "epoch": 3.8526510480887795, "grad_norm": 0.039978478103876114, "learning_rate": 0.01, "loss": 1.998, "step": 37494 }, { "epoch": 3.852959309494451, "grad_norm": 0.04366837814450264, "learning_rate": 0.01, "loss": 1.9992, "step": 37497 }, { "epoch": 3.8532675709001234, "grad_norm": 0.03704690560698509, "learning_rate": 0.01, "loss": 2.0011, "step": 37500 }, { "epoch": 3.853575832305795, "grad_norm": 0.03964201733469963, "learning_rate": 0.01, "loss": 1.9773, "step": 37503 }, { "epoch": 3.8538840937114673, "grad_norm": 0.060866422951221466, "learning_rate": 0.01, "loss": 1.9779, "step": 37506 }, { "epoch": 3.8541923551171395, "grad_norm": 0.1373164802789688, "learning_rate": 0.01, "loss": 1.9751, "step": 37509 }, { "epoch": 3.8545006165228113, "grad_norm": 0.12396371364593506, "learning_rate": 0.01, "loss": 2.0007, "step": 37512 }, { "epoch": 3.8548088779284835, "grad_norm": 0.05732661485671997, "learning_rate": 0.01, "loss": 1.9889, "step": 37515 }, { "epoch": 3.8551171393341552, "grad_norm": 0.072254478931427, "learning_rate": 0.01, "loss": 2.0066, "step": 37518 }, { "epoch": 3.8554254007398274, "grad_norm": 0.05103025957942009, "learning_rate": 0.01, "loss": 1.981, "step": 37521 }, { "epoch": 3.855733662145499, "grad_norm": 0.06759181618690491, "learning_rate": 0.01, "loss": 1.9863, "step": 37524 }, { "epoch": 3.8560419235511714, "grad_norm": 0.08190746605396271, "learning_rate": 0.01, "loss": 2.0103, "step": 37527 }, { "epoch": 3.8563501849568436, "grad_norm": 0.052758537232875824, "learning_rate": 0.01, "loss": 2.0052, "step": 37530 }, { "epoch": 3.8566584463625153, "grad_norm": 0.06971180438995361, "learning_rate": 0.01, "loss": 1.9825, "step": 37533 }, { "epoch": 3.8569667077681875, "grad_norm": 0.051832135766744614, "learning_rate": 0.01, "loss": 2.0283, "step": 37536 }, { "epoch": 3.8572749691738597, "grad_norm": 0.11838405579328537, "learning_rate": 0.01, "loss": 1.9869, "step": 37539 }, { "epoch": 3.8575832305795315, "grad_norm": 0.0657329186797142, "learning_rate": 0.01, "loss": 2.0137, "step": 37542 }, { "epoch": 3.857891491985203, "grad_norm": 0.042532552033662796, "learning_rate": 0.01, "loss": 1.9633, "step": 37545 }, { "epoch": 3.8581997533908754, "grad_norm": 0.029605695977807045, "learning_rate": 0.01, "loss": 1.9747, "step": 37548 }, { "epoch": 3.8585080147965476, "grad_norm": 0.045208025723695755, "learning_rate": 0.01, "loss": 2.0034, "step": 37551 }, { "epoch": 3.8588162762022193, "grad_norm": 0.061939138919115067, "learning_rate": 0.01, "loss": 2.0042, "step": 37554 }, { "epoch": 3.8591245376078915, "grad_norm": 0.08816343545913696, "learning_rate": 0.01, "loss": 1.9995, "step": 37557 }, { "epoch": 3.8594327990135637, "grad_norm": 0.06432296335697174, "learning_rate": 0.01, "loss": 1.9992, "step": 37560 }, { "epoch": 3.8597410604192355, "grad_norm": 0.07691509276628494, "learning_rate": 0.01, "loss": 1.984, "step": 37563 }, { "epoch": 3.8600493218249077, "grad_norm": 0.06328088790178299, "learning_rate": 0.01, "loss": 2.0085, "step": 37566 }, { "epoch": 3.8603575832305794, "grad_norm": 0.10897444188594818, "learning_rate": 0.01, "loss": 2.0061, "step": 37569 }, { "epoch": 3.8606658446362516, "grad_norm": 0.07461336255073547, "learning_rate": 0.01, "loss": 1.9987, "step": 37572 }, { "epoch": 3.8609741060419234, "grad_norm": 0.08543375879526138, "learning_rate": 0.01, "loss": 1.9953, "step": 37575 }, { "epoch": 3.8612823674475956, "grad_norm": 0.09546118974685669, "learning_rate": 0.01, "loss": 1.9984, "step": 37578 }, { "epoch": 3.8615906288532678, "grad_norm": 0.0741833969950676, "learning_rate": 0.01, "loss": 2.0174, "step": 37581 }, { "epoch": 3.8618988902589395, "grad_norm": 0.06818868219852448, "learning_rate": 0.01, "loss": 1.9662, "step": 37584 }, { "epoch": 3.8622071516646117, "grad_norm": 0.05515025556087494, "learning_rate": 0.01, "loss": 2.0154, "step": 37587 }, { "epoch": 3.8625154130702835, "grad_norm": 0.035819582641124725, "learning_rate": 0.01, "loss": 2.0044, "step": 37590 }, { "epoch": 3.8628236744759556, "grad_norm": 0.03145124390721321, "learning_rate": 0.01, "loss": 1.9895, "step": 37593 }, { "epoch": 3.8631319358816274, "grad_norm": 0.036893073469400406, "learning_rate": 0.01, "loss": 1.971, "step": 37596 }, { "epoch": 3.8634401972872996, "grad_norm": 0.03417917340993881, "learning_rate": 0.01, "loss": 2.0172, "step": 37599 }, { "epoch": 3.863748458692972, "grad_norm": 0.06035599485039711, "learning_rate": 0.01, "loss": 1.9913, "step": 37602 }, { "epoch": 3.8640567200986435, "grad_norm": 0.12924471497535706, "learning_rate": 0.01, "loss": 1.9937, "step": 37605 }, { "epoch": 3.8643649815043157, "grad_norm": 0.1035354882478714, "learning_rate": 0.01, "loss": 1.9955, "step": 37608 }, { "epoch": 3.864673242909988, "grad_norm": 0.08066008985042572, "learning_rate": 0.01, "loss": 1.9858, "step": 37611 }, { "epoch": 3.8649815043156597, "grad_norm": 0.09418363124132156, "learning_rate": 0.01, "loss": 1.9869, "step": 37614 }, { "epoch": 3.8652897657213314, "grad_norm": 0.04805073142051697, "learning_rate": 0.01, "loss": 2.0007, "step": 37617 }, { "epoch": 3.8655980271270036, "grad_norm": 0.04753410443663597, "learning_rate": 0.01, "loss": 1.9875, "step": 37620 }, { "epoch": 3.865906288532676, "grad_norm": 0.0458386205136776, "learning_rate": 0.01, "loss": 1.9936, "step": 37623 }, { "epoch": 3.8662145499383476, "grad_norm": 0.1318642795085907, "learning_rate": 0.01, "loss": 1.9759, "step": 37626 }, { "epoch": 3.8665228113440198, "grad_norm": 0.10072299838066101, "learning_rate": 0.01, "loss": 2.0074, "step": 37629 }, { "epoch": 3.866831072749692, "grad_norm": 0.06701991707086563, "learning_rate": 0.01, "loss": 2.0128, "step": 37632 }, { "epoch": 3.8671393341553637, "grad_norm": 0.055372897535562515, "learning_rate": 0.01, "loss": 2.0143, "step": 37635 }, { "epoch": 3.867447595561036, "grad_norm": 0.061906322836875916, "learning_rate": 0.01, "loss": 1.9947, "step": 37638 }, { "epoch": 3.8677558569667077, "grad_norm": 0.05329981446266174, "learning_rate": 0.01, "loss": 2.0001, "step": 37641 }, { "epoch": 3.86806411837238, "grad_norm": 0.05068975314497948, "learning_rate": 0.01, "loss": 1.9685, "step": 37644 }, { "epoch": 3.8683723797780516, "grad_norm": 0.09255755692720413, "learning_rate": 0.01, "loss": 2.0059, "step": 37647 }, { "epoch": 3.868680641183724, "grad_norm": 0.08617328852415085, "learning_rate": 0.01, "loss": 1.9905, "step": 37650 }, { "epoch": 3.868988902589396, "grad_norm": 0.044413745403289795, "learning_rate": 0.01, "loss": 1.978, "step": 37653 }, { "epoch": 3.8692971639950677, "grad_norm": 0.086729995906353, "learning_rate": 0.01, "loss": 1.9997, "step": 37656 }, { "epoch": 3.86960542540074, "grad_norm": 0.07304412871599197, "learning_rate": 0.01, "loss": 1.9925, "step": 37659 }, { "epoch": 3.8699136868064117, "grad_norm": 0.06820013374090195, "learning_rate": 0.01, "loss": 2.0073, "step": 37662 }, { "epoch": 3.870221948212084, "grad_norm": 0.06025752052664757, "learning_rate": 0.01, "loss": 1.9999, "step": 37665 }, { "epoch": 3.8705302096177556, "grad_norm": 0.0649590715765953, "learning_rate": 0.01, "loss": 1.996, "step": 37668 }, { "epoch": 3.870838471023428, "grad_norm": 0.03553159162402153, "learning_rate": 0.01, "loss": 1.9689, "step": 37671 }, { "epoch": 3.8711467324291, "grad_norm": 0.06017104908823967, "learning_rate": 0.01, "loss": 2.0013, "step": 37674 }, { "epoch": 3.8714549938347718, "grad_norm": 0.05896780639886856, "learning_rate": 0.01, "loss": 1.9801, "step": 37677 }, { "epoch": 3.871763255240444, "grad_norm": 0.03493823856115341, "learning_rate": 0.01, "loss": 2.0032, "step": 37680 }, { "epoch": 3.872071516646116, "grad_norm": 0.11454503238201141, "learning_rate": 0.01, "loss": 2.0004, "step": 37683 }, { "epoch": 3.872379778051788, "grad_norm": 0.03603978455066681, "learning_rate": 0.01, "loss": 1.9834, "step": 37686 }, { "epoch": 3.8726880394574597, "grad_norm": 0.03927883505821228, "learning_rate": 0.01, "loss": 2.0015, "step": 37689 }, { "epoch": 3.872996300863132, "grad_norm": 0.06592239439487457, "learning_rate": 0.01, "loss": 2.0025, "step": 37692 }, { "epoch": 3.873304562268804, "grad_norm": 0.04402392357587814, "learning_rate": 0.01, "loss": 1.9845, "step": 37695 }, { "epoch": 3.873612823674476, "grad_norm": 0.04484686255455017, "learning_rate": 0.01, "loss": 1.9756, "step": 37698 }, { "epoch": 3.873921085080148, "grad_norm": 0.03577352687716484, "learning_rate": 0.01, "loss": 1.9957, "step": 37701 }, { "epoch": 3.87422934648582, "grad_norm": 0.03952937200665474, "learning_rate": 0.01, "loss": 1.9727, "step": 37704 }, { "epoch": 3.874537607891492, "grad_norm": 0.048469673842191696, "learning_rate": 0.01, "loss": 1.9783, "step": 37707 }, { "epoch": 3.874845869297164, "grad_norm": 0.04531377553939819, "learning_rate": 0.01, "loss": 1.9878, "step": 37710 }, { "epoch": 3.875154130702836, "grad_norm": 0.11962947994470596, "learning_rate": 0.01, "loss": 2.013, "step": 37713 }, { "epoch": 3.875462392108508, "grad_norm": 0.03379445523023605, "learning_rate": 0.01, "loss": 1.9817, "step": 37716 }, { "epoch": 3.87577065351418, "grad_norm": 0.036722879856824875, "learning_rate": 0.01, "loss": 1.969, "step": 37719 }, { "epoch": 3.876078914919852, "grad_norm": 0.03215174376964569, "learning_rate": 0.01, "loss": 1.9704, "step": 37722 }, { "epoch": 3.876387176325524, "grad_norm": 0.04591568931937218, "learning_rate": 0.01, "loss": 1.9886, "step": 37725 }, { "epoch": 3.876695437731196, "grad_norm": 0.04655212163925171, "learning_rate": 0.01, "loss": 1.9971, "step": 37728 }, { "epoch": 3.877003699136868, "grad_norm": 0.053555794060230255, "learning_rate": 0.01, "loss": 1.9863, "step": 37731 }, { "epoch": 3.8773119605425403, "grad_norm": 0.05492212250828743, "learning_rate": 0.01, "loss": 2.0099, "step": 37734 }, { "epoch": 3.877620221948212, "grad_norm": 0.07766193896532059, "learning_rate": 0.01, "loss": 1.982, "step": 37737 }, { "epoch": 3.877928483353884, "grad_norm": 0.040087949484586716, "learning_rate": 0.01, "loss": 2.018, "step": 37740 }, { "epoch": 3.878236744759556, "grad_norm": 0.07583244144916534, "learning_rate": 0.01, "loss": 1.9694, "step": 37743 }, { "epoch": 3.8785450061652282, "grad_norm": 0.0815076008439064, "learning_rate": 0.01, "loss": 1.9871, "step": 37746 }, { "epoch": 3.8788532675709, "grad_norm": 0.09940320998430252, "learning_rate": 0.01, "loss": 2.0211, "step": 37749 }, { "epoch": 3.879161528976572, "grad_norm": 0.07148890197277069, "learning_rate": 0.01, "loss": 2.0068, "step": 37752 }, { "epoch": 3.8794697903822444, "grad_norm": 0.0479053258895874, "learning_rate": 0.01, "loss": 2.0073, "step": 37755 }, { "epoch": 3.879778051787916, "grad_norm": 0.0394553542137146, "learning_rate": 0.01, "loss": 1.9948, "step": 37758 }, { "epoch": 3.8800863131935883, "grad_norm": 0.04193533584475517, "learning_rate": 0.01, "loss": 1.992, "step": 37761 }, { "epoch": 3.88039457459926, "grad_norm": 0.09164579212665558, "learning_rate": 0.01, "loss": 1.9968, "step": 37764 }, { "epoch": 3.8807028360049323, "grad_norm": 0.06852073222398758, "learning_rate": 0.01, "loss": 2.0127, "step": 37767 }, { "epoch": 3.881011097410604, "grad_norm": 0.049102578312158585, "learning_rate": 0.01, "loss": 2.0228, "step": 37770 }, { "epoch": 3.881319358816276, "grad_norm": 0.12974314391613007, "learning_rate": 0.01, "loss": 2.0158, "step": 37773 }, { "epoch": 3.8816276202219484, "grad_norm": 0.07722889631986618, "learning_rate": 0.01, "loss": 1.9791, "step": 37776 }, { "epoch": 3.88193588162762, "grad_norm": 0.07344157248735428, "learning_rate": 0.01, "loss": 2.0069, "step": 37779 }, { "epoch": 3.8822441430332923, "grad_norm": 0.035780053585767746, "learning_rate": 0.01, "loss": 2.0301, "step": 37782 }, { "epoch": 3.882552404438964, "grad_norm": 0.04180228337645531, "learning_rate": 0.01, "loss": 2.0013, "step": 37785 }, { "epoch": 3.8828606658446363, "grad_norm": 0.06659300625324249, "learning_rate": 0.01, "loss": 2.0016, "step": 37788 }, { "epoch": 3.883168927250308, "grad_norm": 0.08275042474269867, "learning_rate": 0.01, "loss": 1.9989, "step": 37791 }, { "epoch": 3.8834771886559802, "grad_norm": 0.12271406501531601, "learning_rate": 0.01, "loss": 2.0088, "step": 37794 }, { "epoch": 3.8837854500616524, "grad_norm": 0.14413659274578094, "learning_rate": 0.01, "loss": 2.0153, "step": 37797 }, { "epoch": 3.884093711467324, "grad_norm": 0.0712043046951294, "learning_rate": 0.01, "loss": 1.9928, "step": 37800 }, { "epoch": 3.8844019728729964, "grad_norm": 0.06497600674629211, "learning_rate": 0.01, "loss": 2.018, "step": 37803 }, { "epoch": 3.8847102342786686, "grad_norm": 0.053064875304698944, "learning_rate": 0.01, "loss": 1.9804, "step": 37806 }, { "epoch": 3.8850184956843403, "grad_norm": 0.04865000769495964, "learning_rate": 0.01, "loss": 2.0012, "step": 37809 }, { "epoch": 3.885326757090012, "grad_norm": 0.03862098976969719, "learning_rate": 0.01, "loss": 1.9836, "step": 37812 }, { "epoch": 3.8856350184956843, "grad_norm": 0.07821226119995117, "learning_rate": 0.01, "loss": 1.9974, "step": 37815 }, { "epoch": 3.8859432799013565, "grad_norm": 0.04477280378341675, "learning_rate": 0.01, "loss": 1.9914, "step": 37818 }, { "epoch": 3.886251541307028, "grad_norm": 0.11052905023097992, "learning_rate": 0.01, "loss": 1.9593, "step": 37821 }, { "epoch": 3.8865598027127004, "grad_norm": 0.07528085261583328, "learning_rate": 0.01, "loss": 1.9931, "step": 37824 }, { "epoch": 3.8868680641183726, "grad_norm": 0.09329832345247269, "learning_rate": 0.01, "loss": 1.9903, "step": 37827 }, { "epoch": 3.8871763255240444, "grad_norm": 0.03858843818306923, "learning_rate": 0.01, "loss": 2.0163, "step": 37830 }, { "epoch": 3.8874845869297165, "grad_norm": 0.05364726856350899, "learning_rate": 0.01, "loss": 1.9904, "step": 37833 }, { "epoch": 3.8877928483353883, "grad_norm": 0.036494240164756775, "learning_rate": 0.01, "loss": 1.9889, "step": 37836 }, { "epoch": 3.8881011097410605, "grad_norm": 0.033809784799814224, "learning_rate": 0.01, "loss": 1.9991, "step": 37839 }, { "epoch": 3.8884093711467322, "grad_norm": 0.08358091861009598, "learning_rate": 0.01, "loss": 1.9997, "step": 37842 }, { "epoch": 3.8887176325524044, "grad_norm": 0.04314170405268669, "learning_rate": 0.01, "loss": 1.9829, "step": 37845 }, { "epoch": 3.8890258939580766, "grad_norm": 0.06448940187692642, "learning_rate": 0.01, "loss": 2.0054, "step": 37848 }, { "epoch": 3.8893341553637484, "grad_norm": 0.04236144572496414, "learning_rate": 0.01, "loss": 2.0074, "step": 37851 }, { "epoch": 3.8896424167694206, "grad_norm": 0.08682555705308914, "learning_rate": 0.01, "loss": 1.9907, "step": 37854 }, { "epoch": 3.8899506781750923, "grad_norm": 0.1599910706281662, "learning_rate": 0.01, "loss": 2.0193, "step": 37857 }, { "epoch": 3.8902589395807645, "grad_norm": 0.04853571951389313, "learning_rate": 0.01, "loss": 2.013, "step": 37860 }, { "epoch": 3.8905672009864363, "grad_norm": 0.054008036851882935, "learning_rate": 0.01, "loss": 2.0035, "step": 37863 }, { "epoch": 3.8908754623921085, "grad_norm": 0.0459190271794796, "learning_rate": 0.01, "loss": 2.0101, "step": 37866 }, { "epoch": 3.8911837237977807, "grad_norm": 0.047940943390131, "learning_rate": 0.01, "loss": 1.9976, "step": 37869 }, { "epoch": 3.8914919852034524, "grad_norm": 0.041892241686582565, "learning_rate": 0.01, "loss": 1.9977, "step": 37872 }, { "epoch": 3.8918002466091246, "grad_norm": 0.037861257791519165, "learning_rate": 0.01, "loss": 2.0089, "step": 37875 }, { "epoch": 3.892108508014797, "grad_norm": 0.1314014196395874, "learning_rate": 0.01, "loss": 2.0124, "step": 37878 }, { "epoch": 3.8924167694204685, "grad_norm": 0.053526077419519424, "learning_rate": 0.01, "loss": 1.9998, "step": 37881 }, { "epoch": 3.8927250308261403, "grad_norm": 0.044471751898527145, "learning_rate": 0.01, "loss": 1.9975, "step": 37884 }, { "epoch": 3.8930332922318125, "grad_norm": 0.10089153051376343, "learning_rate": 0.01, "loss": 2.005, "step": 37887 }, { "epoch": 3.8933415536374847, "grad_norm": 0.09028996527194977, "learning_rate": 0.01, "loss": 2.0231, "step": 37890 }, { "epoch": 3.8936498150431564, "grad_norm": 0.06257162988185883, "learning_rate": 0.01, "loss": 2.0181, "step": 37893 }, { "epoch": 3.8939580764488286, "grad_norm": 0.056669414043426514, "learning_rate": 0.01, "loss": 2.0005, "step": 37896 }, { "epoch": 3.894266337854501, "grad_norm": 0.09135915338993073, "learning_rate": 0.01, "loss": 2.0008, "step": 37899 }, { "epoch": 3.8945745992601726, "grad_norm": 0.055741071701049805, "learning_rate": 0.01, "loss": 1.9884, "step": 37902 }, { "epoch": 3.8948828606658448, "grad_norm": 0.06624908000230789, "learning_rate": 0.01, "loss": 2.0107, "step": 37905 }, { "epoch": 3.8951911220715165, "grad_norm": 0.030004529282450676, "learning_rate": 0.01, "loss": 2.0076, "step": 37908 }, { "epoch": 3.8954993834771887, "grad_norm": 0.04271325841546059, "learning_rate": 0.01, "loss": 1.9966, "step": 37911 }, { "epoch": 3.8958076448828605, "grad_norm": 0.04701056331396103, "learning_rate": 0.01, "loss": 1.9912, "step": 37914 }, { "epoch": 3.8961159062885327, "grad_norm": 0.04298432916402817, "learning_rate": 0.01, "loss": 1.9828, "step": 37917 }, { "epoch": 3.896424167694205, "grad_norm": 0.04561863839626312, "learning_rate": 0.01, "loss": 1.9627, "step": 37920 }, { "epoch": 3.8967324290998766, "grad_norm": 0.12965039908885956, "learning_rate": 0.01, "loss": 2.0225, "step": 37923 }, { "epoch": 3.897040690505549, "grad_norm": 0.1109326183795929, "learning_rate": 0.01, "loss": 2.0033, "step": 37926 }, { "epoch": 3.8973489519112205, "grad_norm": 0.06224660202860832, "learning_rate": 0.01, "loss": 2.0008, "step": 37929 }, { "epoch": 3.8976572133168927, "grad_norm": 0.06318973749876022, "learning_rate": 0.01, "loss": 2.0092, "step": 37932 }, { "epoch": 3.8979654747225645, "grad_norm": 0.06799352914094925, "learning_rate": 0.01, "loss": 2.0034, "step": 37935 }, { "epoch": 3.8982737361282367, "grad_norm": 0.07071252167224884, "learning_rate": 0.01, "loss": 2.0239, "step": 37938 }, { "epoch": 3.898581997533909, "grad_norm": 0.03879907727241516, "learning_rate": 0.01, "loss": 1.9725, "step": 37941 }, { "epoch": 3.8988902589395806, "grad_norm": 0.048606619238853455, "learning_rate": 0.01, "loss": 2.0106, "step": 37944 }, { "epoch": 3.899198520345253, "grad_norm": 0.09814203530550003, "learning_rate": 0.01, "loss": 1.9919, "step": 37947 }, { "epoch": 3.899506781750925, "grad_norm": 0.04557656869292259, "learning_rate": 0.01, "loss": 1.98, "step": 37950 }, { "epoch": 3.8998150431565968, "grad_norm": 0.04122527688741684, "learning_rate": 0.01, "loss": 2.0131, "step": 37953 }, { "epoch": 3.900123304562269, "grad_norm": 0.060118671506643295, "learning_rate": 0.01, "loss": 2.0066, "step": 37956 }, { "epoch": 3.9004315659679407, "grad_norm": 0.14660358428955078, "learning_rate": 0.01, "loss": 1.9894, "step": 37959 }, { "epoch": 3.900739827373613, "grad_norm": 0.09819690883159637, "learning_rate": 0.01, "loss": 1.9912, "step": 37962 }, { "epoch": 3.9010480887792847, "grad_norm": 0.041503068059682846, "learning_rate": 0.01, "loss": 1.9565, "step": 37965 }, { "epoch": 3.901356350184957, "grad_norm": 0.04021459445357323, "learning_rate": 0.01, "loss": 2.0216, "step": 37968 }, { "epoch": 3.901664611590629, "grad_norm": 0.04789562150835991, "learning_rate": 0.01, "loss": 2.021, "step": 37971 }, { "epoch": 3.901972872996301, "grad_norm": 0.03949809446930885, "learning_rate": 0.01, "loss": 1.9655, "step": 37974 }, { "epoch": 3.902281134401973, "grad_norm": 0.041829485446214676, "learning_rate": 0.01, "loss": 2.0131, "step": 37977 }, { "epoch": 3.9025893958076447, "grad_norm": 0.13577204942703247, "learning_rate": 0.01, "loss": 1.9844, "step": 37980 }, { "epoch": 3.902897657213317, "grad_norm": 0.03821146488189697, "learning_rate": 0.01, "loss": 2.0024, "step": 37983 }, { "epoch": 3.9032059186189887, "grad_norm": 0.07382892072200775, "learning_rate": 0.01, "loss": 2.0065, "step": 37986 }, { "epoch": 3.903514180024661, "grad_norm": 0.04287475720047951, "learning_rate": 0.01, "loss": 1.9884, "step": 37989 }, { "epoch": 3.903822441430333, "grad_norm": 0.08588143438100815, "learning_rate": 0.01, "loss": 2.0178, "step": 37992 }, { "epoch": 3.904130702836005, "grad_norm": 0.060327284038066864, "learning_rate": 0.01, "loss": 1.9785, "step": 37995 }, { "epoch": 3.904438964241677, "grad_norm": 0.050726667046546936, "learning_rate": 0.01, "loss": 1.9798, "step": 37998 }, { "epoch": 3.904747225647349, "grad_norm": 0.0876336544752121, "learning_rate": 0.01, "loss": 1.9977, "step": 38001 }, { "epoch": 3.905055487053021, "grad_norm": 0.05690234154462814, "learning_rate": 0.01, "loss": 1.9835, "step": 38004 }, { "epoch": 3.9053637484586927, "grad_norm": 0.07977151870727539, "learning_rate": 0.01, "loss": 2.0067, "step": 38007 }, { "epoch": 3.905672009864365, "grad_norm": 0.038005661219358444, "learning_rate": 0.01, "loss": 2.0113, "step": 38010 }, { "epoch": 3.905980271270037, "grad_norm": 0.1033119261264801, "learning_rate": 0.01, "loss": 1.9964, "step": 38013 }, { "epoch": 3.906288532675709, "grad_norm": 0.05024554207921028, "learning_rate": 0.01, "loss": 1.9703, "step": 38016 }, { "epoch": 3.906596794081381, "grad_norm": 0.07625278830528259, "learning_rate": 0.01, "loss": 1.9895, "step": 38019 }, { "epoch": 3.9069050554870532, "grad_norm": 0.10337291657924652, "learning_rate": 0.01, "loss": 2.0167, "step": 38022 }, { "epoch": 3.907213316892725, "grad_norm": 0.04415413364768028, "learning_rate": 0.01, "loss": 2.0055, "step": 38025 }, { "epoch": 3.907521578298397, "grad_norm": 0.0347541943192482, "learning_rate": 0.01, "loss": 2.0018, "step": 38028 }, { "epoch": 3.907829839704069, "grad_norm": 0.036209288984537125, "learning_rate": 0.01, "loss": 1.9647, "step": 38031 }, { "epoch": 3.908138101109741, "grad_norm": 0.0959850400686264, "learning_rate": 0.01, "loss": 2.0013, "step": 38034 }, { "epoch": 3.908446362515413, "grad_norm": 0.05654274299740791, "learning_rate": 0.01, "loss": 1.983, "step": 38037 }, { "epoch": 3.908754623921085, "grad_norm": 0.11333990842103958, "learning_rate": 0.01, "loss": 1.9921, "step": 38040 }, { "epoch": 3.9090628853267573, "grad_norm": 0.05329067260026932, "learning_rate": 0.01, "loss": 2.0203, "step": 38043 }, { "epoch": 3.909371146732429, "grad_norm": 0.03489474579691887, "learning_rate": 0.01, "loss": 1.9992, "step": 38046 }, { "epoch": 3.909679408138101, "grad_norm": 0.044765155762434006, "learning_rate": 0.01, "loss": 1.9994, "step": 38049 }, { "epoch": 3.909987669543773, "grad_norm": 0.11689729988574982, "learning_rate": 0.01, "loss": 2.0322, "step": 38052 }, { "epoch": 3.910295930949445, "grad_norm": 0.04785189777612686, "learning_rate": 0.01, "loss": 2.0103, "step": 38055 }, { "epoch": 3.910604192355117, "grad_norm": 0.040157750248909, "learning_rate": 0.01, "loss": 1.9836, "step": 38058 }, { "epoch": 3.910912453760789, "grad_norm": 0.04107039049267769, "learning_rate": 0.01, "loss": 2.0106, "step": 38061 }, { "epoch": 3.9112207151664613, "grad_norm": 0.1298598051071167, "learning_rate": 0.01, "loss": 1.9866, "step": 38064 }, { "epoch": 3.911528976572133, "grad_norm": 0.04832058027386665, "learning_rate": 0.01, "loss": 1.9831, "step": 38067 }, { "epoch": 3.9118372379778052, "grad_norm": 0.035566527396440506, "learning_rate": 0.01, "loss": 2.0097, "step": 38070 }, { "epoch": 3.9121454993834774, "grad_norm": 0.12645745277404785, "learning_rate": 0.01, "loss": 1.9978, "step": 38073 }, { "epoch": 3.912453760789149, "grad_norm": 0.13044588267803192, "learning_rate": 0.01, "loss": 1.9921, "step": 38076 }, { "epoch": 3.912762022194821, "grad_norm": 0.0751858800649643, "learning_rate": 0.01, "loss": 2.024, "step": 38079 }, { "epoch": 3.913070283600493, "grad_norm": 0.06417164206504822, "learning_rate": 0.01, "loss": 2.0241, "step": 38082 }, { "epoch": 3.9133785450061653, "grad_norm": 0.03895511105656624, "learning_rate": 0.01, "loss": 2.0074, "step": 38085 }, { "epoch": 3.913686806411837, "grad_norm": 0.03581606224179268, "learning_rate": 0.01, "loss": 1.9923, "step": 38088 }, { "epoch": 3.9139950678175093, "grad_norm": 0.02936650812625885, "learning_rate": 0.01, "loss": 1.989, "step": 38091 }, { "epoch": 3.9143033292231815, "grad_norm": 0.036661721765995026, "learning_rate": 0.01, "loss": 2.009, "step": 38094 }, { "epoch": 3.914611590628853, "grad_norm": 0.08295129984617233, "learning_rate": 0.01, "loss": 2.0141, "step": 38097 }, { "epoch": 3.9149198520345254, "grad_norm": 0.09887045621871948, "learning_rate": 0.01, "loss": 2.0028, "step": 38100 }, { "epoch": 3.915228113440197, "grad_norm": 0.0821511521935463, "learning_rate": 0.01, "loss": 1.9886, "step": 38103 }, { "epoch": 3.9155363748458694, "grad_norm": 0.0894683301448822, "learning_rate": 0.01, "loss": 2.0059, "step": 38106 }, { "epoch": 3.915844636251541, "grad_norm": 0.043715961277484894, "learning_rate": 0.01, "loss": 2.0197, "step": 38109 }, { "epoch": 3.9161528976572133, "grad_norm": 0.06341227144002914, "learning_rate": 0.01, "loss": 2.004, "step": 38112 }, { "epoch": 3.9164611590628855, "grad_norm": 0.10574996471405029, "learning_rate": 0.01, "loss": 1.9834, "step": 38115 }, { "epoch": 3.9167694204685573, "grad_norm": 0.07099676132202148, "learning_rate": 0.01, "loss": 1.9827, "step": 38118 }, { "epoch": 3.9170776818742294, "grad_norm": 0.051802802830934525, "learning_rate": 0.01, "loss": 2.0185, "step": 38121 }, { "epoch": 3.917385943279901, "grad_norm": 0.059143371880054474, "learning_rate": 0.01, "loss": 2.0009, "step": 38124 }, { "epoch": 3.9176942046855734, "grad_norm": 0.07478468120098114, "learning_rate": 0.01, "loss": 1.99, "step": 38127 }, { "epoch": 3.918002466091245, "grad_norm": 0.05269934609532356, "learning_rate": 0.01, "loss": 2.002, "step": 38130 }, { "epoch": 3.9183107274969173, "grad_norm": 0.03774020075798035, "learning_rate": 0.01, "loss": 1.9863, "step": 38133 }, { "epoch": 3.9186189889025895, "grad_norm": 0.06620101630687714, "learning_rate": 0.01, "loss": 1.9894, "step": 38136 }, { "epoch": 3.9189272503082613, "grad_norm": 0.13302361965179443, "learning_rate": 0.01, "loss": 1.9997, "step": 38139 }, { "epoch": 3.9192355117139335, "grad_norm": 0.05435695871710777, "learning_rate": 0.01, "loss": 1.9911, "step": 38142 }, { "epoch": 3.9195437731196057, "grad_norm": 0.041471030563116074, "learning_rate": 0.01, "loss": 1.9936, "step": 38145 }, { "epoch": 3.9198520345252774, "grad_norm": 0.032130394130945206, "learning_rate": 0.01, "loss": 1.9983, "step": 38148 }, { "epoch": 3.920160295930949, "grad_norm": 0.032780006527900696, "learning_rate": 0.01, "loss": 1.9729, "step": 38151 }, { "epoch": 3.9204685573366214, "grad_norm": 0.03699677437543869, "learning_rate": 0.01, "loss": 2.0168, "step": 38154 }, { "epoch": 3.9207768187422936, "grad_norm": 0.04473430663347244, "learning_rate": 0.01, "loss": 2.0035, "step": 38157 }, { "epoch": 3.9210850801479653, "grad_norm": 0.06131662428379059, "learning_rate": 0.01, "loss": 2.0053, "step": 38160 }, { "epoch": 3.9213933415536375, "grad_norm": 0.10085607320070267, "learning_rate": 0.01, "loss": 1.9963, "step": 38163 }, { "epoch": 3.9217016029593097, "grad_norm": 0.11734067648649216, "learning_rate": 0.01, "loss": 1.9955, "step": 38166 }, { "epoch": 3.9220098643649814, "grad_norm": 0.09287893027067184, "learning_rate": 0.01, "loss": 1.9914, "step": 38169 }, { "epoch": 3.9223181257706536, "grad_norm": 0.058617495000362396, "learning_rate": 0.01, "loss": 1.9977, "step": 38172 }, { "epoch": 3.9226263871763254, "grad_norm": 0.0597362145781517, "learning_rate": 0.01, "loss": 1.9811, "step": 38175 }, { "epoch": 3.9229346485819976, "grad_norm": 0.046750448644161224, "learning_rate": 0.01, "loss": 1.9766, "step": 38178 }, { "epoch": 3.9232429099876693, "grad_norm": 0.03863513097167015, "learning_rate": 0.01, "loss": 2.0081, "step": 38181 }, { "epoch": 3.9235511713933415, "grad_norm": 0.03276417404413223, "learning_rate": 0.01, "loss": 2.0101, "step": 38184 }, { "epoch": 3.9238594327990137, "grad_norm": 0.03270519897341728, "learning_rate": 0.01, "loss": 1.9997, "step": 38187 }, { "epoch": 3.9241676942046855, "grad_norm": 0.05820414423942566, "learning_rate": 0.01, "loss": 2.0085, "step": 38190 }, { "epoch": 3.9244759556103577, "grad_norm": 0.0589318610727787, "learning_rate": 0.01, "loss": 2.015, "step": 38193 }, { "epoch": 3.92478421701603, "grad_norm": 0.05013816058635712, "learning_rate": 0.01, "loss": 1.9994, "step": 38196 }, { "epoch": 3.9250924784217016, "grad_norm": 0.10934565216302872, "learning_rate": 0.01, "loss": 2.0149, "step": 38199 }, { "epoch": 3.9254007398273734, "grad_norm": 0.08114335685968399, "learning_rate": 0.01, "loss": 1.9721, "step": 38202 }, { "epoch": 3.9257090012330456, "grad_norm": 0.056287068873643875, "learning_rate": 0.01, "loss": 1.9753, "step": 38205 }, { "epoch": 3.9260172626387178, "grad_norm": 0.04509197920560837, "learning_rate": 0.01, "loss": 2.0047, "step": 38208 }, { "epoch": 3.9263255240443895, "grad_norm": 0.04409080743789673, "learning_rate": 0.01, "loss": 1.9804, "step": 38211 }, { "epoch": 3.9266337854500617, "grad_norm": 0.044174257665872574, "learning_rate": 0.01, "loss": 1.9897, "step": 38214 }, { "epoch": 3.926942046855734, "grad_norm": 0.05122366175055504, "learning_rate": 0.01, "loss": 1.9924, "step": 38217 }, { "epoch": 3.9272503082614056, "grad_norm": 0.12582433223724365, "learning_rate": 0.01, "loss": 1.9903, "step": 38220 }, { "epoch": 3.927558569667078, "grad_norm": 0.060516782104969025, "learning_rate": 0.01, "loss": 2.0088, "step": 38223 }, { "epoch": 3.9278668310727496, "grad_norm": 0.04210484027862549, "learning_rate": 0.01, "loss": 2.0051, "step": 38226 }, { "epoch": 3.928175092478422, "grad_norm": 0.043509434908628464, "learning_rate": 0.01, "loss": 1.9963, "step": 38229 }, { "epoch": 3.9284833538840935, "grad_norm": 0.03601152077317238, "learning_rate": 0.01, "loss": 1.9945, "step": 38232 }, { "epoch": 3.9287916152897657, "grad_norm": 0.04590906575322151, "learning_rate": 0.01, "loss": 2.01, "step": 38235 }, { "epoch": 3.929099876695438, "grad_norm": 0.06336984038352966, "learning_rate": 0.01, "loss": 1.9757, "step": 38238 }, { "epoch": 3.9294081381011097, "grad_norm": 0.07014600187540054, "learning_rate": 0.01, "loss": 1.9891, "step": 38241 }, { "epoch": 3.929716399506782, "grad_norm": 0.07760234922170639, "learning_rate": 0.01, "loss": 2.0032, "step": 38244 }, { "epoch": 3.9300246609124536, "grad_norm": 0.1088213101029396, "learning_rate": 0.01, "loss": 2.0149, "step": 38247 }, { "epoch": 3.930332922318126, "grad_norm": 0.04306039586663246, "learning_rate": 0.01, "loss": 2.014, "step": 38250 }, { "epoch": 3.9306411837237976, "grad_norm": 0.057269759476184845, "learning_rate": 0.01, "loss": 1.9873, "step": 38253 }, { "epoch": 3.9309494451294698, "grad_norm": 0.04819104075431824, "learning_rate": 0.01, "loss": 2.011, "step": 38256 }, { "epoch": 3.931257706535142, "grad_norm": 0.038784999400377274, "learning_rate": 0.01, "loss": 1.9981, "step": 38259 }, { "epoch": 3.9315659679408137, "grad_norm": 0.08599941432476044, "learning_rate": 0.01, "loss": 1.9838, "step": 38262 }, { "epoch": 3.931874229346486, "grad_norm": 0.10680285841226578, "learning_rate": 0.01, "loss": 1.9891, "step": 38265 }, { "epoch": 3.932182490752158, "grad_norm": 0.09683788567781448, "learning_rate": 0.01, "loss": 2.0191, "step": 38268 }, { "epoch": 3.93249075215783, "grad_norm": 0.040799640119075775, "learning_rate": 0.01, "loss": 1.9741, "step": 38271 }, { "epoch": 3.9327990135635016, "grad_norm": 0.08265798538923264, "learning_rate": 0.01, "loss": 2.0359, "step": 38274 }, { "epoch": 3.933107274969174, "grad_norm": 0.0628572627902031, "learning_rate": 0.01, "loss": 2.018, "step": 38277 }, { "epoch": 3.933415536374846, "grad_norm": 0.06475923955440521, "learning_rate": 0.01, "loss": 2.0023, "step": 38280 }, { "epoch": 3.9337237977805177, "grad_norm": 0.043740466237068176, "learning_rate": 0.01, "loss": 2.0003, "step": 38283 }, { "epoch": 3.93403205918619, "grad_norm": 0.029720323160290718, "learning_rate": 0.01, "loss": 1.9905, "step": 38286 }, { "epoch": 3.934340320591862, "grad_norm": 0.050941772758960724, "learning_rate": 0.01, "loss": 1.9757, "step": 38289 }, { "epoch": 3.934648581997534, "grad_norm": 0.1068761870265007, "learning_rate": 0.01, "loss": 2.0054, "step": 38292 }, { "epoch": 3.934956843403206, "grad_norm": 0.06410616636276245, "learning_rate": 0.01, "loss": 1.9931, "step": 38295 }, { "epoch": 3.935265104808878, "grad_norm": 0.10598262399435043, "learning_rate": 0.01, "loss": 2.0068, "step": 38298 }, { "epoch": 3.93557336621455, "grad_norm": 0.05296116694808006, "learning_rate": 0.01, "loss": 1.9625, "step": 38301 }, { "epoch": 3.9358816276202218, "grad_norm": 0.04025321081280708, "learning_rate": 0.01, "loss": 2.0119, "step": 38304 }, { "epoch": 3.936189889025894, "grad_norm": 0.11896882951259613, "learning_rate": 0.01, "loss": 2.0066, "step": 38307 }, { "epoch": 3.936498150431566, "grad_norm": 0.07203146070241928, "learning_rate": 0.01, "loss": 1.9781, "step": 38310 }, { "epoch": 3.936806411837238, "grad_norm": 0.05813155323266983, "learning_rate": 0.01, "loss": 1.9872, "step": 38313 }, { "epoch": 3.93711467324291, "grad_norm": 0.0439312644302845, "learning_rate": 0.01, "loss": 1.9977, "step": 38316 }, { "epoch": 3.937422934648582, "grad_norm": 0.05411090329289436, "learning_rate": 0.01, "loss": 2.0366, "step": 38319 }, { "epoch": 3.937731196054254, "grad_norm": 0.035287659615278244, "learning_rate": 0.01, "loss": 2.0147, "step": 38322 }, { "epoch": 3.938039457459926, "grad_norm": 0.08188273012638092, "learning_rate": 0.01, "loss": 1.9825, "step": 38325 }, { "epoch": 3.938347718865598, "grad_norm": 0.06879545003175735, "learning_rate": 0.01, "loss": 2.0037, "step": 38328 }, { "epoch": 3.93865598027127, "grad_norm": 0.06965189427137375, "learning_rate": 0.01, "loss": 1.9757, "step": 38331 }, { "epoch": 3.938964241676942, "grad_norm": 0.07698633521795273, "learning_rate": 0.01, "loss": 1.9823, "step": 38334 }, { "epoch": 3.939272503082614, "grad_norm": 0.06989213079214096, "learning_rate": 0.01, "loss": 1.9765, "step": 38337 }, { "epoch": 3.9395807644882863, "grad_norm": 0.09224730730056763, "learning_rate": 0.01, "loss": 1.9898, "step": 38340 }, { "epoch": 3.939889025893958, "grad_norm": 0.15504223108291626, "learning_rate": 0.01, "loss": 2.0109, "step": 38343 }, { "epoch": 3.94019728729963, "grad_norm": 0.06935428082942963, "learning_rate": 0.01, "loss": 2.0157, "step": 38346 }, { "epoch": 3.940505548705302, "grad_norm": 0.051906321197748184, "learning_rate": 0.01, "loss": 1.9971, "step": 38349 }, { "epoch": 3.940813810110974, "grad_norm": 0.059517163783311844, "learning_rate": 0.01, "loss": 1.9764, "step": 38352 }, { "epoch": 3.941122071516646, "grad_norm": 0.031905319541692734, "learning_rate": 0.01, "loss": 1.9958, "step": 38355 }, { "epoch": 3.941430332922318, "grad_norm": 0.06473187357187271, "learning_rate": 0.01, "loss": 1.9852, "step": 38358 }, { "epoch": 3.9417385943279903, "grad_norm": 0.0756862536072731, "learning_rate": 0.01, "loss": 2.0025, "step": 38361 }, { "epoch": 3.942046855733662, "grad_norm": 0.06513907015323639, "learning_rate": 0.01, "loss": 2.0076, "step": 38364 }, { "epoch": 3.9423551171393343, "grad_norm": 0.0908990353345871, "learning_rate": 0.01, "loss": 1.9868, "step": 38367 }, { "epoch": 3.942663378545006, "grad_norm": 0.06427323073148727, "learning_rate": 0.01, "loss": 1.9848, "step": 38370 }, { "epoch": 3.9429716399506782, "grad_norm": 0.09153769910335541, "learning_rate": 0.01, "loss": 1.9816, "step": 38373 }, { "epoch": 3.94327990135635, "grad_norm": 0.0451352633535862, "learning_rate": 0.01, "loss": 1.9973, "step": 38376 }, { "epoch": 3.943588162762022, "grad_norm": 0.06844813376665115, "learning_rate": 0.01, "loss": 1.9987, "step": 38379 }, { "epoch": 3.9438964241676944, "grad_norm": 0.07901204377412796, "learning_rate": 0.01, "loss": 2.0011, "step": 38382 }, { "epoch": 3.944204685573366, "grad_norm": 0.08094186335802078, "learning_rate": 0.01, "loss": 2.0076, "step": 38385 }, { "epoch": 3.9445129469790383, "grad_norm": 0.09053739905357361, "learning_rate": 0.01, "loss": 1.9936, "step": 38388 }, { "epoch": 3.9448212083847105, "grad_norm": 0.04751008749008179, "learning_rate": 0.01, "loss": 1.993, "step": 38391 }, { "epoch": 3.9451294697903823, "grad_norm": 0.042345285415649414, "learning_rate": 0.01, "loss": 1.9857, "step": 38394 }, { "epoch": 3.945437731196054, "grad_norm": 0.1027415469288826, "learning_rate": 0.01, "loss": 1.9761, "step": 38397 }, { "epoch": 3.945745992601726, "grad_norm": 0.12780915200710297, "learning_rate": 0.01, "loss": 2.0122, "step": 38400 }, { "epoch": 3.9460542540073984, "grad_norm": 0.0731390118598938, "learning_rate": 0.01, "loss": 2.0005, "step": 38403 }, { "epoch": 3.94636251541307, "grad_norm": 0.07617928087711334, "learning_rate": 0.01, "loss": 2.0131, "step": 38406 }, { "epoch": 3.9466707768187423, "grad_norm": 0.049755457788705826, "learning_rate": 0.01, "loss": 1.9756, "step": 38409 }, { "epoch": 3.9469790382244145, "grad_norm": 0.043812233954668045, "learning_rate": 0.01, "loss": 1.9935, "step": 38412 }, { "epoch": 3.9472872996300863, "grad_norm": 0.07829032093286514, "learning_rate": 0.01, "loss": 2.0004, "step": 38415 }, { "epoch": 3.9475955610357585, "grad_norm": 0.056485142558813095, "learning_rate": 0.01, "loss": 2.0117, "step": 38418 }, { "epoch": 3.9479038224414302, "grad_norm": 0.09569665789604187, "learning_rate": 0.01, "loss": 1.9906, "step": 38421 }, { "epoch": 3.9482120838471024, "grad_norm": 0.07466506212949753, "learning_rate": 0.01, "loss": 2.0046, "step": 38424 }, { "epoch": 3.948520345252774, "grad_norm": 0.08420269191265106, "learning_rate": 0.01, "loss": 2.0009, "step": 38427 }, { "epoch": 3.9488286066584464, "grad_norm": 0.05188721418380737, "learning_rate": 0.01, "loss": 2.0409, "step": 38430 }, { "epoch": 3.9491368680641186, "grad_norm": 0.042890697717666626, "learning_rate": 0.01, "loss": 1.9798, "step": 38433 }, { "epoch": 3.9494451294697903, "grad_norm": 0.05752531811594963, "learning_rate": 0.01, "loss": 1.975, "step": 38436 }, { "epoch": 3.9497533908754625, "grad_norm": 0.0403323695063591, "learning_rate": 0.01, "loss": 1.9951, "step": 38439 }, { "epoch": 3.9500616522811343, "grad_norm": 0.035122547298669815, "learning_rate": 0.01, "loss": 1.9995, "step": 38442 }, { "epoch": 3.9503699136868065, "grad_norm": 0.04063395410776138, "learning_rate": 0.01, "loss": 2.0264, "step": 38445 }, { "epoch": 3.950678175092478, "grad_norm": 0.05300283804535866, "learning_rate": 0.01, "loss": 2.0159, "step": 38448 }, { "epoch": 3.9509864364981504, "grad_norm": 0.06941550225019455, "learning_rate": 0.01, "loss": 2.0155, "step": 38451 }, { "epoch": 3.9512946979038226, "grad_norm": 0.04268152639269829, "learning_rate": 0.01, "loss": 2.0116, "step": 38454 }, { "epoch": 3.9516029593094943, "grad_norm": 0.058871544897556305, "learning_rate": 0.01, "loss": 1.9888, "step": 38457 }, { "epoch": 3.9519112207151665, "grad_norm": 0.0643804594874382, "learning_rate": 0.01, "loss": 1.9971, "step": 38460 }, { "epoch": 3.9522194821208387, "grad_norm": 0.04509326070547104, "learning_rate": 0.01, "loss": 2.0231, "step": 38463 }, { "epoch": 3.9525277435265105, "grad_norm": 0.04596889764070511, "learning_rate": 0.01, "loss": 1.9926, "step": 38466 }, { "epoch": 3.9528360049321822, "grad_norm": 0.08639726787805557, "learning_rate": 0.01, "loss": 1.973, "step": 38469 }, { "epoch": 3.9531442663378544, "grad_norm": 0.08404930680990219, "learning_rate": 0.01, "loss": 1.9867, "step": 38472 }, { "epoch": 3.9534525277435266, "grad_norm": 0.09206783026456833, "learning_rate": 0.01, "loss": 1.968, "step": 38475 }, { "epoch": 3.9537607891491984, "grad_norm": 0.041316352784633636, "learning_rate": 0.01, "loss": 1.9764, "step": 38478 }, { "epoch": 3.9540690505548706, "grad_norm": 0.07000883668661118, "learning_rate": 0.01, "loss": 2.0071, "step": 38481 }, { "epoch": 3.9543773119605428, "grad_norm": 0.048716410994529724, "learning_rate": 0.01, "loss": 1.9984, "step": 38484 }, { "epoch": 3.9546855733662145, "grad_norm": 0.05251453444361687, "learning_rate": 0.01, "loss": 2.0007, "step": 38487 }, { "epoch": 3.9549938347718867, "grad_norm": 0.04517770931124687, "learning_rate": 0.01, "loss": 1.9998, "step": 38490 }, { "epoch": 3.9553020961775585, "grad_norm": 0.07227790355682373, "learning_rate": 0.01, "loss": 1.9965, "step": 38493 }, { "epoch": 3.9556103575832307, "grad_norm": 0.09953956305980682, "learning_rate": 0.01, "loss": 1.9875, "step": 38496 }, { "epoch": 3.9559186189889024, "grad_norm": 0.06194831430912018, "learning_rate": 0.01, "loss": 2.0008, "step": 38499 }, { "epoch": 3.9562268803945746, "grad_norm": 0.10566883534193039, "learning_rate": 0.01, "loss": 1.9839, "step": 38502 }, { "epoch": 3.956535141800247, "grad_norm": 0.04950516298413277, "learning_rate": 0.01, "loss": 1.9868, "step": 38505 }, { "epoch": 3.9568434032059185, "grad_norm": 0.04657790809869766, "learning_rate": 0.01, "loss": 1.9794, "step": 38508 }, { "epoch": 3.9571516646115907, "grad_norm": 0.05642826855182648, "learning_rate": 0.01, "loss": 1.9739, "step": 38511 }, { "epoch": 3.9574599260172625, "grad_norm": 0.049003373831510544, "learning_rate": 0.01, "loss": 1.9892, "step": 38514 }, { "epoch": 3.9577681874229347, "grad_norm": 0.05019281804561615, "learning_rate": 0.01, "loss": 1.9758, "step": 38517 }, { "epoch": 3.9580764488286064, "grad_norm": 0.05060233548283577, "learning_rate": 0.01, "loss": 2.0093, "step": 38520 }, { "epoch": 3.9583847102342786, "grad_norm": 0.051873739808797836, "learning_rate": 0.01, "loss": 1.998, "step": 38523 }, { "epoch": 3.958692971639951, "grad_norm": 0.11860267072916031, "learning_rate": 0.01, "loss": 1.9988, "step": 38526 }, { "epoch": 3.9590012330456226, "grad_norm": 0.03894282132387161, "learning_rate": 0.01, "loss": 2.0014, "step": 38529 }, { "epoch": 3.9593094944512948, "grad_norm": 0.0839400663971901, "learning_rate": 0.01, "loss": 1.9988, "step": 38532 }, { "epoch": 3.959617755856967, "grad_norm": 0.10270547866821289, "learning_rate": 0.01, "loss": 1.9829, "step": 38535 }, { "epoch": 3.9599260172626387, "grad_norm": 0.07309349626302719, "learning_rate": 0.01, "loss": 2.0124, "step": 38538 }, { "epoch": 3.9602342786683105, "grad_norm": 0.03441464155912399, "learning_rate": 0.01, "loss": 2.0036, "step": 38541 }, { "epoch": 3.9605425400739827, "grad_norm": 0.04434891417622566, "learning_rate": 0.01, "loss": 1.9907, "step": 38544 }, { "epoch": 3.960850801479655, "grad_norm": 0.030151626095175743, "learning_rate": 0.01, "loss": 1.9991, "step": 38547 }, { "epoch": 3.9611590628853266, "grad_norm": 0.03249426558613777, "learning_rate": 0.01, "loss": 1.9883, "step": 38550 }, { "epoch": 3.961467324290999, "grad_norm": 0.06774075329303741, "learning_rate": 0.01, "loss": 1.9894, "step": 38553 }, { "epoch": 3.961775585696671, "grad_norm": 0.1510474681854248, "learning_rate": 0.01, "loss": 1.989, "step": 38556 }, { "epoch": 3.9620838471023427, "grad_norm": 0.080832839012146, "learning_rate": 0.01, "loss": 2.0033, "step": 38559 }, { "epoch": 3.962392108508015, "grad_norm": 0.08914501219987869, "learning_rate": 0.01, "loss": 2.0032, "step": 38562 }, { "epoch": 3.9627003699136867, "grad_norm": 0.10509749501943588, "learning_rate": 0.01, "loss": 1.9611, "step": 38565 }, { "epoch": 3.963008631319359, "grad_norm": 0.0980307012796402, "learning_rate": 0.01, "loss": 2.006, "step": 38568 }, { "epoch": 3.9633168927250306, "grad_norm": 0.04917595908045769, "learning_rate": 0.01, "loss": 2.0061, "step": 38571 }, { "epoch": 3.963625154130703, "grad_norm": 0.04090237617492676, "learning_rate": 0.01, "loss": 2.0112, "step": 38574 }, { "epoch": 3.963933415536375, "grad_norm": 0.06782566010951996, "learning_rate": 0.01, "loss": 2.0158, "step": 38577 }, { "epoch": 3.9642416769420468, "grad_norm": 0.04495246335864067, "learning_rate": 0.01, "loss": 1.9784, "step": 38580 }, { "epoch": 3.964549938347719, "grad_norm": 0.11113790422677994, "learning_rate": 0.01, "loss": 1.9719, "step": 38583 }, { "epoch": 3.9648581997533907, "grad_norm": 0.09196839481592178, "learning_rate": 0.01, "loss": 1.997, "step": 38586 }, { "epoch": 3.965166461159063, "grad_norm": 0.07392636686563492, "learning_rate": 0.01, "loss": 2.0102, "step": 38589 }, { "epoch": 3.9654747225647347, "grad_norm": 0.0521097257733345, "learning_rate": 0.01, "loss": 1.983, "step": 38592 }, { "epoch": 3.965782983970407, "grad_norm": 0.05052189901471138, "learning_rate": 0.01, "loss": 1.9929, "step": 38595 }, { "epoch": 3.966091245376079, "grad_norm": 0.08592119067907333, "learning_rate": 0.01, "loss": 1.9949, "step": 38598 }, { "epoch": 3.966399506781751, "grad_norm": 0.08535821735858917, "learning_rate": 0.01, "loss": 1.9743, "step": 38601 }, { "epoch": 3.966707768187423, "grad_norm": 0.06658685207366943, "learning_rate": 0.01, "loss": 2.0038, "step": 38604 }, { "epoch": 3.967016029593095, "grad_norm": 0.04116528108716011, "learning_rate": 0.01, "loss": 1.969, "step": 38607 }, { "epoch": 3.967324290998767, "grad_norm": 0.04124008119106293, "learning_rate": 0.01, "loss": 1.9889, "step": 38610 }, { "epoch": 3.967632552404439, "grad_norm": 0.047569263726472855, "learning_rate": 0.01, "loss": 1.9848, "step": 38613 }, { "epoch": 3.967940813810111, "grad_norm": 0.04800506308674812, "learning_rate": 0.01, "loss": 2.0008, "step": 38616 }, { "epoch": 3.968249075215783, "grad_norm": 0.06227673590183258, "learning_rate": 0.01, "loss": 2.0099, "step": 38619 }, { "epoch": 3.968557336621455, "grad_norm": 0.036228880286216736, "learning_rate": 0.01, "loss": 1.9945, "step": 38622 }, { "epoch": 3.968865598027127, "grad_norm": 0.03745630383491516, "learning_rate": 0.01, "loss": 1.9916, "step": 38625 }, { "epoch": 3.969173859432799, "grad_norm": 0.08907367289066315, "learning_rate": 0.01, "loss": 2.0278, "step": 38628 }, { "epoch": 3.969482120838471, "grad_norm": 0.054506488144397736, "learning_rate": 0.01, "loss": 2.0001, "step": 38631 }, { "epoch": 3.969790382244143, "grad_norm": 0.04185614362359047, "learning_rate": 0.01, "loss": 1.9847, "step": 38634 }, { "epoch": 3.970098643649815, "grad_norm": 0.04917627200484276, "learning_rate": 0.01, "loss": 2.0155, "step": 38637 }, { "epoch": 3.970406905055487, "grad_norm": 0.040258195251226425, "learning_rate": 0.01, "loss": 2.0119, "step": 38640 }, { "epoch": 3.970715166461159, "grad_norm": 0.08127589523792267, "learning_rate": 0.01, "loss": 1.9753, "step": 38643 }, { "epoch": 3.971023427866831, "grad_norm": 0.07298692315816879, "learning_rate": 0.01, "loss": 1.9958, "step": 38646 }, { "epoch": 3.9713316892725032, "grad_norm": 0.09801699966192245, "learning_rate": 0.01, "loss": 1.9747, "step": 38649 }, { "epoch": 3.971639950678175, "grad_norm": 0.10016069561243057, "learning_rate": 0.01, "loss": 1.9986, "step": 38652 }, { "epoch": 3.971948212083847, "grad_norm": 0.038826216012239456, "learning_rate": 0.01, "loss": 1.9768, "step": 38655 }, { "epoch": 3.9722564734895194, "grad_norm": 0.04124876856803894, "learning_rate": 0.01, "loss": 1.977, "step": 38658 }, { "epoch": 3.972564734895191, "grad_norm": 0.03699329122900963, "learning_rate": 0.01, "loss": 2.0101, "step": 38661 }, { "epoch": 3.972872996300863, "grad_norm": 0.03506563603878021, "learning_rate": 0.01, "loss": 2.0035, "step": 38664 }, { "epoch": 3.973181257706535, "grad_norm": 0.08274342864751816, "learning_rate": 0.01, "loss": 1.9819, "step": 38667 }, { "epoch": 3.9734895191122073, "grad_norm": 0.10225984454154968, "learning_rate": 0.01, "loss": 2.0115, "step": 38670 }, { "epoch": 3.973797780517879, "grad_norm": 0.08367688208818436, "learning_rate": 0.01, "loss": 1.9809, "step": 38673 }, { "epoch": 3.974106041923551, "grad_norm": 0.05682690069079399, "learning_rate": 0.01, "loss": 2.0012, "step": 38676 }, { "epoch": 3.9744143033292234, "grad_norm": 0.03980601951479912, "learning_rate": 0.01, "loss": 1.9668, "step": 38679 }, { "epoch": 3.974722564734895, "grad_norm": 0.03310983628034592, "learning_rate": 0.01, "loss": 1.9976, "step": 38682 }, { "epoch": 3.9750308261405674, "grad_norm": 0.05037367716431618, "learning_rate": 0.01, "loss": 1.9845, "step": 38685 }, { "epoch": 3.975339087546239, "grad_norm": 0.09068721532821655, "learning_rate": 0.01, "loss": 1.9848, "step": 38688 }, { "epoch": 3.9756473489519113, "grad_norm": 0.10251244902610779, "learning_rate": 0.01, "loss": 1.9981, "step": 38691 }, { "epoch": 3.975955610357583, "grad_norm": 0.04860818758606911, "learning_rate": 0.01, "loss": 2.0073, "step": 38694 }, { "epoch": 3.9762638717632552, "grad_norm": 0.0460125096142292, "learning_rate": 0.01, "loss": 1.9925, "step": 38697 }, { "epoch": 3.9765721331689274, "grad_norm": 0.03295229375362396, "learning_rate": 0.01, "loss": 1.9682, "step": 38700 }, { "epoch": 3.976880394574599, "grad_norm": 0.0434846356511116, "learning_rate": 0.01, "loss": 1.9944, "step": 38703 }, { "epoch": 3.9771886559802714, "grad_norm": 0.04077495262026787, "learning_rate": 0.01, "loss": 2.0107, "step": 38706 }, { "epoch": 3.977496917385943, "grad_norm": 0.08263571560382843, "learning_rate": 0.01, "loss": 2.0125, "step": 38709 }, { "epoch": 3.9778051787916153, "grad_norm": 0.1033141016960144, "learning_rate": 0.01, "loss": 2.02, "step": 38712 }, { "epoch": 3.978113440197287, "grad_norm": 0.12253855168819427, "learning_rate": 0.01, "loss": 2.0039, "step": 38715 }, { "epoch": 3.9784217016029593, "grad_norm": 0.08476614207029343, "learning_rate": 0.01, "loss": 1.974, "step": 38718 }, { "epoch": 3.9787299630086315, "grad_norm": 0.08248502016067505, "learning_rate": 0.01, "loss": 1.9934, "step": 38721 }, { "epoch": 3.979038224414303, "grad_norm": 0.06338318437337875, "learning_rate": 0.01, "loss": 1.9632, "step": 38724 }, { "epoch": 3.9793464858199754, "grad_norm": 0.061125461012125015, "learning_rate": 0.01, "loss": 1.9902, "step": 38727 }, { "epoch": 3.9796547472256476, "grad_norm": 0.04191330447793007, "learning_rate": 0.01, "loss": 1.9837, "step": 38730 }, { "epoch": 3.9799630086313194, "grad_norm": 0.04181262478232384, "learning_rate": 0.01, "loss": 1.9952, "step": 38733 }, { "epoch": 3.980271270036991, "grad_norm": 0.054846856743097305, "learning_rate": 0.01, "loss": 2.0109, "step": 38736 }, { "epoch": 3.9805795314426633, "grad_norm": 0.1322845071554184, "learning_rate": 0.01, "loss": 1.9626, "step": 38739 }, { "epoch": 3.9808877928483355, "grad_norm": 0.06795060634613037, "learning_rate": 0.01, "loss": 1.9972, "step": 38742 }, { "epoch": 3.9811960542540072, "grad_norm": 0.04729272425174713, "learning_rate": 0.01, "loss": 2.0027, "step": 38745 }, { "epoch": 3.9815043156596794, "grad_norm": 0.05951160192489624, "learning_rate": 0.01, "loss": 2.0016, "step": 38748 }, { "epoch": 3.9818125770653516, "grad_norm": 0.14003396034240723, "learning_rate": 0.01, "loss": 2.0166, "step": 38751 }, { "epoch": 3.9821208384710234, "grad_norm": 0.04996626079082489, "learning_rate": 0.01, "loss": 2.002, "step": 38754 }, { "epoch": 3.9824290998766956, "grad_norm": 0.08134348690509796, "learning_rate": 0.01, "loss": 2.0159, "step": 38757 }, { "epoch": 3.9827373612823673, "grad_norm": 0.04592986777424812, "learning_rate": 0.01, "loss": 2.0107, "step": 38760 }, { "epoch": 3.9830456226880395, "grad_norm": 0.06769111007452011, "learning_rate": 0.01, "loss": 1.998, "step": 38763 }, { "epoch": 3.9833538840937113, "grad_norm": 0.04501201957464218, "learning_rate": 0.01, "loss": 2.0177, "step": 38766 }, { "epoch": 3.9836621454993835, "grad_norm": 0.07144643366336823, "learning_rate": 0.01, "loss": 1.9948, "step": 38769 }, { "epoch": 3.9839704069050557, "grad_norm": 0.05670906975865364, "learning_rate": 0.01, "loss": 2.0011, "step": 38772 }, { "epoch": 3.9842786683107274, "grad_norm": 0.03870624676346779, "learning_rate": 0.01, "loss": 2.0089, "step": 38775 }, { "epoch": 3.9845869297163996, "grad_norm": 0.08053532242774963, "learning_rate": 0.01, "loss": 1.9955, "step": 38778 }, { "epoch": 3.9848951911220714, "grad_norm": 0.03753774240612984, "learning_rate": 0.01, "loss": 1.9866, "step": 38781 }, { "epoch": 3.9852034525277436, "grad_norm": 0.04568708315491676, "learning_rate": 0.01, "loss": 1.9811, "step": 38784 }, { "epoch": 3.9855117139334153, "grad_norm": 0.03626095503568649, "learning_rate": 0.01, "loss": 1.995, "step": 38787 }, { "epoch": 3.9858199753390875, "grad_norm": 0.04620308801531792, "learning_rate": 0.01, "loss": 2.0172, "step": 38790 }, { "epoch": 3.9861282367447597, "grad_norm": 0.05767418071627617, "learning_rate": 0.01, "loss": 1.9905, "step": 38793 }, { "epoch": 3.9864364981504314, "grad_norm": 0.04969481751322746, "learning_rate": 0.01, "loss": 1.9795, "step": 38796 }, { "epoch": 3.9867447595561036, "grad_norm": 0.0532878078520298, "learning_rate": 0.01, "loss": 1.9875, "step": 38799 }, { "epoch": 3.987053020961776, "grad_norm": 0.07379619777202606, "learning_rate": 0.01, "loss": 2.0124, "step": 38802 }, { "epoch": 3.9873612823674476, "grad_norm": 0.07538430392742157, "learning_rate": 0.01, "loss": 2.0105, "step": 38805 }, { "epoch": 3.9876695437731193, "grad_norm": 0.0686052069067955, "learning_rate": 0.01, "loss": 2.0059, "step": 38808 }, { "epoch": 3.9879778051787915, "grad_norm": 0.08726216852664948, "learning_rate": 0.01, "loss": 1.9803, "step": 38811 }, { "epoch": 3.9882860665844637, "grad_norm": 0.08535965532064438, "learning_rate": 0.01, "loss": 1.9997, "step": 38814 }, { "epoch": 3.9885943279901355, "grad_norm": 0.046852223575115204, "learning_rate": 0.01, "loss": 1.982, "step": 38817 }, { "epoch": 3.9889025893958077, "grad_norm": 0.06200144812464714, "learning_rate": 0.01, "loss": 2.0423, "step": 38820 }, { "epoch": 3.98921085080148, "grad_norm": 0.03675130382180214, "learning_rate": 0.01, "loss": 2.0003, "step": 38823 }, { "epoch": 3.9895191122071516, "grad_norm": 0.054221536964178085, "learning_rate": 0.01, "loss": 2.0149, "step": 38826 }, { "epoch": 3.989827373612824, "grad_norm": 0.0411151684820652, "learning_rate": 0.01, "loss": 2.0047, "step": 38829 }, { "epoch": 3.9901356350184956, "grad_norm": 0.03962259367108345, "learning_rate": 0.01, "loss": 2.01, "step": 38832 }, { "epoch": 3.9904438964241677, "grad_norm": 0.04097359627485275, "learning_rate": 0.01, "loss": 1.9974, "step": 38835 }, { "epoch": 3.9907521578298395, "grad_norm": 0.13092494010925293, "learning_rate": 0.01, "loss": 1.9936, "step": 38838 }, { "epoch": 3.9910604192355117, "grad_norm": 0.03308350592851639, "learning_rate": 0.01, "loss": 1.9941, "step": 38841 }, { "epoch": 3.991368680641184, "grad_norm": 0.06447092443704605, "learning_rate": 0.01, "loss": 2.0022, "step": 38844 }, { "epoch": 3.9916769420468556, "grad_norm": 0.06456363946199417, "learning_rate": 0.01, "loss": 2.0051, "step": 38847 }, { "epoch": 3.991985203452528, "grad_norm": 0.04208895191550255, "learning_rate": 0.01, "loss": 2.01, "step": 38850 }, { "epoch": 3.9922934648582, "grad_norm": 0.03307265415787697, "learning_rate": 0.01, "loss": 1.9962, "step": 38853 }, { "epoch": 3.9926017262638718, "grad_norm": 0.06227843463420868, "learning_rate": 0.01, "loss": 1.9864, "step": 38856 }, { "epoch": 3.9929099876695435, "grad_norm": 0.1568191796541214, "learning_rate": 0.01, "loss": 1.9979, "step": 38859 }, { "epoch": 3.9932182490752157, "grad_norm": 0.05974143370985985, "learning_rate": 0.01, "loss": 2.0018, "step": 38862 }, { "epoch": 3.993526510480888, "grad_norm": 0.07882939279079437, "learning_rate": 0.01, "loss": 2.0102, "step": 38865 }, { "epoch": 3.9938347718865597, "grad_norm": 0.053341448307037354, "learning_rate": 0.01, "loss": 2.0123, "step": 38868 }, { "epoch": 3.994143033292232, "grad_norm": 0.0673101395368576, "learning_rate": 0.01, "loss": 2.0183, "step": 38871 }, { "epoch": 3.994451294697904, "grad_norm": 0.06340447813272476, "learning_rate": 0.01, "loss": 1.9966, "step": 38874 }, { "epoch": 3.994759556103576, "grad_norm": 0.06743529438972473, "learning_rate": 0.01, "loss": 2.0047, "step": 38877 }, { "epoch": 3.995067817509248, "grad_norm": 0.044702883809804916, "learning_rate": 0.01, "loss": 1.9927, "step": 38880 }, { "epoch": 3.9953760789149197, "grad_norm": 0.038102682679891586, "learning_rate": 0.01, "loss": 2.0005, "step": 38883 }, { "epoch": 3.995684340320592, "grad_norm": 0.0471016988158226, "learning_rate": 0.01, "loss": 1.9687, "step": 38886 }, { "epoch": 3.9959926017262637, "grad_norm": 0.10289987921714783, "learning_rate": 0.01, "loss": 2.0056, "step": 38889 }, { "epoch": 3.996300863131936, "grad_norm": 0.08012085407972336, "learning_rate": 0.01, "loss": 2.011, "step": 38892 }, { "epoch": 3.996609124537608, "grad_norm": 0.07357537001371384, "learning_rate": 0.01, "loss": 1.9837, "step": 38895 }, { "epoch": 3.99691738594328, "grad_norm": 0.08909714221954346, "learning_rate": 0.01, "loss": 2.0116, "step": 38898 }, { "epoch": 3.997225647348952, "grad_norm": 0.044727593660354614, "learning_rate": 0.01, "loss": 2.0027, "step": 38901 }, { "epoch": 3.9975339087546238, "grad_norm": 0.039593808352947235, "learning_rate": 0.01, "loss": 2.0071, "step": 38904 }, { "epoch": 3.997842170160296, "grad_norm": 0.03478686884045601, "learning_rate": 0.01, "loss": 2.0185, "step": 38907 }, { "epoch": 3.9981504315659677, "grad_norm": 0.09729648381471634, "learning_rate": 0.01, "loss": 2.0008, "step": 38910 }, { "epoch": 3.99845869297164, "grad_norm": 0.12499833852052689, "learning_rate": 0.01, "loss": 1.9861, "step": 38913 }, { "epoch": 3.998766954377312, "grad_norm": 0.10060276091098785, "learning_rate": 0.01, "loss": 1.9634, "step": 38916 }, { "epoch": 3.999075215782984, "grad_norm": 0.060861632227897644, "learning_rate": 0.01, "loss": 1.9902, "step": 38919 }, { "epoch": 3.999383477188656, "grad_norm": 0.058074526488780975, "learning_rate": 0.01, "loss": 1.9935, "step": 38922 }, { "epoch": 3.9996917385943282, "grad_norm": 0.08413925021886826, "learning_rate": 0.01, "loss": 1.9958, "step": 38925 }, { "epoch": 4.0, "grad_norm": 0.06637567281723022, "learning_rate": 0.01, "loss": 1.978, "step": 38928 } ], "logging_steps": 3, "max_steps": 38928, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.9712316946488503e+21, "train_batch_size": 18, "trial_name": null, "trial_params": null }