{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999003686360466, "eval_steps": 500, "global_step": 2509, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003985254558134901, "grad_norm": 1.1682030229557891, "learning_rate": 1.5384615384615387e-06, "loss": 1.298, "step": 1 }, { "epoch": 0.0007970509116269802, "grad_norm": 1.1450089305489202, "learning_rate": 3.0769230769230774e-06, "loss": 1.2606, "step": 2 }, { "epoch": 0.0011955763674404703, "grad_norm": 1.1741081208500113, "learning_rate": 4.615384615384616e-06, "loss": 1.317, "step": 3 }, { "epoch": 0.0015941018232539603, "grad_norm": 0.9416899998464173, "learning_rate": 6.153846153846155e-06, "loss": 1.3244, "step": 4 }, { "epoch": 0.0019926272790674504, "grad_norm": 1.17272634152426, "learning_rate": 7.692307692307694e-06, "loss": 1.2691, "step": 5 }, { "epoch": 0.0023911527348809405, "grad_norm": 0.8938096517547656, "learning_rate": 9.230769230769232e-06, "loss": 1.3528, "step": 6 }, { "epoch": 0.0027896781906944306, "grad_norm": 1.4781168610568196, "learning_rate": 1.076923076923077e-05, "loss": 1.3365, "step": 7 }, { "epoch": 0.0031882036465079207, "grad_norm": 1.323556020239157, "learning_rate": 1.230769230769231e-05, "loss": 1.3375, "step": 8 }, { "epoch": 0.0035867291023214108, "grad_norm": 1.9185267488446602, "learning_rate": 1.3846153846153847e-05, "loss": 1.2539, "step": 9 }, { "epoch": 0.003985254558134901, "grad_norm": 1.674769393300418, "learning_rate": 1.5384615384615387e-05, "loss": 1.2846, "step": 10 }, { "epoch": 0.004383780013948391, "grad_norm": 1.509254464009656, "learning_rate": 1.6923076923076924e-05, "loss": 1.2827, "step": 11 }, { "epoch": 0.004782305469761881, "grad_norm": 1.3888982819984244, "learning_rate": 1.8461538461538465e-05, "loss": 1.323, "step": 12 }, { "epoch": 0.0051808309255753715, "grad_norm": 1.5318573252337477, "learning_rate": 2e-05, "loss": 1.2514, "step": 13 }, { "epoch": 0.005579356381388861, "grad_norm": 1.7716074190442104, "learning_rate": 2.153846153846154e-05, "loss": 1.224, "step": 14 }, { "epoch": 0.005977881837202352, "grad_norm": 1.5774334632902784, "learning_rate": 2.3076923076923076e-05, "loss": 1.2513, "step": 15 }, { "epoch": 0.006376407293015841, "grad_norm": 2.079535848411662, "learning_rate": 2.461538461538462e-05, "loss": 1.2324, "step": 16 }, { "epoch": 0.006774932748829332, "grad_norm": 1.6224827783116045, "learning_rate": 2.6153846153846157e-05, "loss": 1.2437, "step": 17 }, { "epoch": 0.0071734582046428215, "grad_norm": 1.731628237386042, "learning_rate": 2.7692307692307694e-05, "loss": 1.1941, "step": 18 }, { "epoch": 0.007571983660456312, "grad_norm": 1.6486789158728468, "learning_rate": 2.923076923076923e-05, "loss": 1.2157, "step": 19 }, { "epoch": 0.007970509116269802, "grad_norm": 1.5878767170549857, "learning_rate": 3.0769230769230774e-05, "loss": 1.2282, "step": 20 }, { "epoch": 0.008369034572083291, "grad_norm": 1.769036375459327, "learning_rate": 3.230769230769231e-05, "loss": 1.2276, "step": 21 }, { "epoch": 0.008767560027896783, "grad_norm": 1.6262212305434318, "learning_rate": 3.384615384615385e-05, "loss": 1.1966, "step": 22 }, { "epoch": 0.009166085483710272, "grad_norm": 1.6178408127403725, "learning_rate": 3.538461538461539e-05, "loss": 1.1681, "step": 23 }, { "epoch": 0.009564610939523762, "grad_norm": 1.8576583132326376, "learning_rate": 3.692307692307693e-05, "loss": 1.1733, "step": 24 }, { "epoch": 0.009963136395337252, "grad_norm": 2.10735796807257, "learning_rate": 3.846153846153846e-05, "loss": 1.1781, "step": 25 }, { "epoch": 0.010361661851150743, "grad_norm": 2.061441058129094, "learning_rate": 4e-05, "loss": 1.1766, "step": 26 }, { "epoch": 0.010760187306964233, "grad_norm": 1.689955130193812, "learning_rate": 3.9999983991661895e-05, "loss": 1.2193, "step": 27 }, { "epoch": 0.011158712762777722, "grad_norm": 2.089801173287961, "learning_rate": 3.99999359666732e-05, "loss": 1.1864, "step": 28 }, { "epoch": 0.011557238218591212, "grad_norm": 2.1810170181408584, "learning_rate": 3.999985592511079e-05, "loss": 1.1981, "step": 29 }, { "epoch": 0.011955763674404703, "grad_norm": 1.76919939388419, "learning_rate": 3.999974386710281e-05, "loss": 1.0961, "step": 30 }, { "epoch": 0.012354289130218193, "grad_norm": 1.7129497533016933, "learning_rate": 3.999959979282864e-05, "loss": 1.1348, "step": 31 }, { "epoch": 0.012752814586031683, "grad_norm": 1.23643910459474, "learning_rate": 3.999942370251891e-05, "loss": 1.1678, "step": 32 }, { "epoch": 0.013151340041845172, "grad_norm": 1.9739746593085052, "learning_rate": 3.999921559645554e-05, "loss": 1.1677, "step": 33 }, { "epoch": 0.013549865497658664, "grad_norm": 1.2119052688289602, "learning_rate": 3.9998975474971644e-05, "loss": 1.1073, "step": 34 }, { "epoch": 0.013948390953472153, "grad_norm": 2.2576004723914758, "learning_rate": 3.999870333845162e-05, "loss": 1.1745, "step": 35 }, { "epoch": 0.014346916409285643, "grad_norm": 1.0352975506141129, "learning_rate": 3.9998399187331125e-05, "loss": 1.1283, "step": 36 }, { "epoch": 0.014745441865099133, "grad_norm": 2.0996501651362243, "learning_rate": 3.999806302209705e-05, "loss": 1.1212, "step": 37 }, { "epoch": 0.015143967320912624, "grad_norm": 1.225013558748945, "learning_rate": 3.9997694843287546e-05, "loss": 1.1209, "step": 38 }, { "epoch": 0.015542492776726114, "grad_norm": 1.764970024494282, "learning_rate": 3.999729465149199e-05, "loss": 1.1445, "step": 39 }, { "epoch": 0.015941018232539603, "grad_norm": 1.4826459735644733, "learning_rate": 3.999686244735103e-05, "loss": 1.1341, "step": 40 }, { "epoch": 0.016339543688353095, "grad_norm": 1.5561072965892055, "learning_rate": 3.9996398231556565e-05, "loss": 1.1582, "step": 41 }, { "epoch": 0.016738069144166583, "grad_norm": 1.659587994848137, "learning_rate": 3.99959020048517e-05, "loss": 1.0567, "step": 42 }, { "epoch": 0.017136594599980074, "grad_norm": 1.4377552352395278, "learning_rate": 3.999537376803085e-05, "loss": 1.1493, "step": 43 }, { "epoch": 0.017535120055793565, "grad_norm": 1.4553863448092164, "learning_rate": 3.99948135219396e-05, "loss": 1.135, "step": 44 }, { "epoch": 0.017933645511607053, "grad_norm": 1.2714080589572554, "learning_rate": 3.9994221267474826e-05, "loss": 1.1033, "step": 45 }, { "epoch": 0.018332170967420545, "grad_norm": 1.4352549542350836, "learning_rate": 3.9993597005584625e-05, "loss": 1.1441, "step": 46 }, { "epoch": 0.018730696423234033, "grad_norm": 1.355132046439583, "learning_rate": 3.9992940737268344e-05, "loss": 1.1654, "step": 47 }, { "epoch": 0.019129221879047524, "grad_norm": 1.2739353234997868, "learning_rate": 3.9992252463576547e-05, "loss": 1.0932, "step": 48 }, { "epoch": 0.019527747334861015, "grad_norm": 1.5599129273160852, "learning_rate": 3.9991532185611054e-05, "loss": 1.1289, "step": 49 }, { "epoch": 0.019926272790674503, "grad_norm": 1.2761680959247894, "learning_rate": 3.9990779904524915e-05, "loss": 1.1008, "step": 50 }, { "epoch": 0.020324798246487995, "grad_norm": 1.527368858852383, "learning_rate": 3.998999562152239e-05, "loss": 1.0787, "step": 51 }, { "epoch": 0.020723323702301486, "grad_norm": 1.3999003605132498, "learning_rate": 3.9989179337859e-05, "loss": 1.0898, "step": 52 }, { "epoch": 0.021121849158114974, "grad_norm": 1.3815814063131917, "learning_rate": 3.998833105484148e-05, "loss": 1.1101, "step": 53 }, { "epoch": 0.021520374613928465, "grad_norm": 1.2557770729577848, "learning_rate": 3.998745077382779e-05, "loss": 1.069, "step": 54 }, { "epoch": 0.021918900069741953, "grad_norm": 1.3393579953921733, "learning_rate": 3.99865384962271e-05, "loss": 1.0726, "step": 55 }, { "epoch": 0.022317425525555445, "grad_norm": 1.8263793843329788, "learning_rate": 3.998559422349983e-05, "loss": 1.0557, "step": 56 }, { "epoch": 0.022715950981368936, "grad_norm": 1.0325515310273663, "learning_rate": 3.99846179571576e-05, "loss": 1.0813, "step": 57 }, { "epoch": 0.023114476437182424, "grad_norm": 1.660896527304962, "learning_rate": 3.998360969876325e-05, "loss": 1.0583, "step": 58 }, { "epoch": 0.023513001892995915, "grad_norm": 0.9235555725660893, "learning_rate": 3.998256944993083e-05, "loss": 1.0914, "step": 59 }, { "epoch": 0.023911527348809407, "grad_norm": 1.5716895827106996, "learning_rate": 3.99814972123256e-05, "loss": 1.0919, "step": 60 }, { "epoch": 0.024310052804622895, "grad_norm": 1.1583937200957837, "learning_rate": 3.998039298766405e-05, "loss": 1.0255, "step": 61 }, { "epoch": 0.024708578260436386, "grad_norm": 1.7286427351895097, "learning_rate": 3.9979256777713856e-05, "loss": 1.0395, "step": 62 }, { "epoch": 0.025107103716249874, "grad_norm": 1.1208870057484686, "learning_rate": 3.9978088584293894e-05, "loss": 1.0619, "step": 63 }, { "epoch": 0.025505629172063365, "grad_norm": 1.302369859146436, "learning_rate": 3.997688840927425e-05, "loss": 1.0526, "step": 64 }, { "epoch": 0.025904154627876857, "grad_norm": 1.4174940189185974, "learning_rate": 3.997565625457621e-05, "loss": 1.0629, "step": 65 }, { "epoch": 0.026302680083690345, "grad_norm": 1.232886411420502, "learning_rate": 3.9974392122172244e-05, "loss": 1.0289, "step": 66 }, { "epoch": 0.026701205539503836, "grad_norm": 1.3590067350773096, "learning_rate": 3.9973096014086017e-05, "loss": 1.0471, "step": 67 }, { "epoch": 0.027099730995317328, "grad_norm": 1.1328281166100345, "learning_rate": 3.9971767932392386e-05, "loss": 1.0373, "step": 68 }, { "epoch": 0.027498256451130815, "grad_norm": 1.398126297704576, "learning_rate": 3.997040787921739e-05, "loss": 1.01, "step": 69 }, { "epoch": 0.027896781906944307, "grad_norm": 1.121173880074476, "learning_rate": 3.996901585673824e-05, "loss": 1.0509, "step": 70 }, { "epoch": 0.028295307362757795, "grad_norm": 1.1562605633658927, "learning_rate": 3.996759186718334e-05, "loss": 1.0394, "step": 71 }, { "epoch": 0.028693832818571286, "grad_norm": 1.563700864160097, "learning_rate": 3.996613591283226e-05, "loss": 1.0338, "step": 72 }, { "epoch": 0.029092358274384778, "grad_norm": 1.258313908870013, "learning_rate": 3.9964647996015745e-05, "loss": 1.0402, "step": 73 }, { "epoch": 0.029490883730198265, "grad_norm": 1.191082490937846, "learning_rate": 3.996312811911569e-05, "loss": 1.0405, "step": 74 }, { "epoch": 0.029889409186011757, "grad_norm": 1.1466323991622203, "learning_rate": 3.996157628456518e-05, "loss": 1.0211, "step": 75 }, { "epoch": 0.030287934641825248, "grad_norm": 1.528035955341438, "learning_rate": 3.9959992494848433e-05, "loss": 1.0462, "step": 76 }, { "epoch": 0.030686460097638736, "grad_norm": 1.324417703714102, "learning_rate": 3.995837675250084e-05, "loss": 1.0842, "step": 77 }, { "epoch": 0.031084985553452227, "grad_norm": 0.893828852913908, "learning_rate": 3.995672906010893e-05, "loss": 1.0135, "step": 78 }, { "epoch": 0.03148351100926572, "grad_norm": 1.5886985675595782, "learning_rate": 3.9955049420310386e-05, "loss": 0.985, "step": 79 }, { "epoch": 0.03188203646507921, "grad_norm": 0.8842933361031705, "learning_rate": 3.995333783579404e-05, "loss": 0.9826, "step": 80 }, { "epoch": 0.032280561920892695, "grad_norm": 1.2312713203427161, "learning_rate": 3.995159430929984e-05, "loss": 0.9933, "step": 81 }, { "epoch": 0.03267908737670619, "grad_norm": 0.968587201770918, "learning_rate": 3.99498188436189e-05, "loss": 1.0305, "step": 82 }, { "epoch": 0.03307761283251968, "grad_norm": 1.354446902187372, "learning_rate": 3.994801144159343e-05, "loss": 1.0015, "step": 83 }, { "epoch": 0.033476138288333165, "grad_norm": 0.9815177511320659, "learning_rate": 3.9946172106116786e-05, "loss": 1.0419, "step": 84 }, { "epoch": 0.03387466374414666, "grad_norm": 1.4163104282934211, "learning_rate": 3.994430084013345e-05, "loss": 1.0693, "step": 85 }, { "epoch": 0.03427318919996015, "grad_norm": 0.9575099047174793, "learning_rate": 3.994239764663898e-05, "loss": 1.0352, "step": 86 }, { "epoch": 0.034671714655773636, "grad_norm": 1.3071690946757393, "learning_rate": 3.99404625286801e-05, "loss": 0.9971, "step": 87 }, { "epoch": 0.03507024011158713, "grad_norm": 1.0094650013129123, "learning_rate": 3.993849548935459e-05, "loss": 1.0347, "step": 88 }, { "epoch": 0.03546876556740062, "grad_norm": 1.2105057500431875, "learning_rate": 3.993649653181138e-05, "loss": 1.0249, "step": 89 }, { "epoch": 0.03586729102321411, "grad_norm": 1.1494038215569387, "learning_rate": 3.9934465659250445e-05, "loss": 1.07, "step": 90 }, { "epoch": 0.036265816479027595, "grad_norm": 1.3619795030427553, "learning_rate": 3.993240287492288e-05, "loss": 0.9727, "step": 91 }, { "epoch": 0.03666434193484109, "grad_norm": 1.0095282991348078, "learning_rate": 3.993030818213087e-05, "loss": 1.0542, "step": 92 }, { "epoch": 0.03706286739065458, "grad_norm": 1.233700566815371, "learning_rate": 3.992818158422766e-05, "loss": 1.0034, "step": 93 }, { "epoch": 0.037461392846468065, "grad_norm": 1.0447313763347152, "learning_rate": 3.992602308461758e-05, "loss": 1.0058, "step": 94 }, { "epoch": 0.03785991830228156, "grad_norm": 1.0696169481085038, "learning_rate": 3.992383268675603e-05, "loss": 1.0478, "step": 95 }, { "epoch": 0.03825844375809505, "grad_norm": 1.3030274633669099, "learning_rate": 3.9921610394149484e-05, "loss": 0.9885, "step": 96 }, { "epoch": 0.038656969213908536, "grad_norm": 0.9547168721038842, "learning_rate": 3.991935621035545e-05, "loss": 1.0126, "step": 97 }, { "epoch": 0.03905549466972203, "grad_norm": 1.0282165364592126, "learning_rate": 3.9917070138982496e-05, "loss": 1.0352, "step": 98 }, { "epoch": 0.03945402012553552, "grad_norm": 1.4052288957523145, "learning_rate": 3.991475218369026e-05, "loss": 0.9908, "step": 99 }, { "epoch": 0.03985254558134901, "grad_norm": 0.883707027247818, "learning_rate": 3.99124023481894e-05, "loss": 1.0155, "step": 100 }, { "epoch": 0.0402510710371625, "grad_norm": 1.0103744787259499, "learning_rate": 3.991002063624159e-05, "loss": 1.0398, "step": 101 }, { "epoch": 0.04064959649297599, "grad_norm": 1.3196267795391554, "learning_rate": 3.9907607051659594e-05, "loss": 0.9986, "step": 102 }, { "epoch": 0.04104812194878948, "grad_norm": 0.9068591396167901, "learning_rate": 3.990516159830712e-05, "loss": 0.988, "step": 103 }, { "epoch": 0.04144664740460297, "grad_norm": 1.3332646337147993, "learning_rate": 3.9902684280098965e-05, "loss": 1.0022, "step": 104 }, { "epoch": 0.04184517286041646, "grad_norm": 1.0383165114992166, "learning_rate": 3.990017510100088e-05, "loss": 0.9767, "step": 105 }, { "epoch": 0.04224369831622995, "grad_norm": 1.0850955219468192, "learning_rate": 3.9897634065029656e-05, "loss": 1.0166, "step": 106 }, { "epoch": 0.042642223772043436, "grad_norm": 1.0137112717519785, "learning_rate": 3.989506117625306e-05, "loss": 1.0039, "step": 107 }, { "epoch": 0.04304074922785693, "grad_norm": 1.3161286100477132, "learning_rate": 3.989245643878987e-05, "loss": 1.031, "step": 108 }, { "epoch": 0.04343927468367042, "grad_norm": 0.9789302387291591, "learning_rate": 3.988981985680983e-05, "loss": 1.0007, "step": 109 }, { "epoch": 0.04383780013948391, "grad_norm": 1.367535024910473, "learning_rate": 3.9887151434533674e-05, "loss": 1.018, "step": 110 }, { "epoch": 0.0442363255952974, "grad_norm": 0.7004934620329838, "learning_rate": 3.988445117623311e-05, "loss": 0.9821, "step": 111 }, { "epoch": 0.04463485105111089, "grad_norm": 1.158874430209204, "learning_rate": 3.9881719086230786e-05, "loss": 0.9865, "step": 112 }, { "epoch": 0.04503337650692438, "grad_norm": 1.152431912909897, "learning_rate": 3.9878955168900334e-05, "loss": 0.9645, "step": 113 }, { "epoch": 0.04543190196273787, "grad_norm": 1.1079205102947556, "learning_rate": 3.987615942866632e-05, "loss": 0.9582, "step": 114 }, { "epoch": 0.04583042741855136, "grad_norm": 1.1791654374723093, "learning_rate": 3.987333187000427e-05, "loss": 1.0214, "step": 115 }, { "epoch": 0.04622895287436485, "grad_norm": 0.936906534851351, "learning_rate": 3.9870472497440624e-05, "loss": 1.0127, "step": 116 }, { "epoch": 0.04662747833017834, "grad_norm": 1.092836008794883, "learning_rate": 3.986758131555278e-05, "loss": 0.9664, "step": 117 }, { "epoch": 0.04702600378599183, "grad_norm": 1.094413912535255, "learning_rate": 3.986465832896902e-05, "loss": 0.9757, "step": 118 }, { "epoch": 0.04742452924180532, "grad_norm": 1.0623495271819532, "learning_rate": 3.986170354236856e-05, "loss": 0.9984, "step": 119 }, { "epoch": 0.047823054697618814, "grad_norm": 0.854179583596702, "learning_rate": 3.985871696048154e-05, "loss": 0.9864, "step": 120 }, { "epoch": 0.0482215801534323, "grad_norm": 1.0432520232855218, "learning_rate": 3.9855698588088965e-05, "loss": 0.9548, "step": 121 }, { "epoch": 0.04862010560924579, "grad_norm": 1.0755622132654334, "learning_rate": 3.9852648430022754e-05, "loss": 0.9485, "step": 122 }, { "epoch": 0.04901863106505928, "grad_norm": 1.2217694552157112, "learning_rate": 3.984956649116571e-05, "loss": 0.9855, "step": 123 }, { "epoch": 0.04941715652087277, "grad_norm": 1.0275276231271884, "learning_rate": 3.984645277645149e-05, "loss": 0.9964, "step": 124 }, { "epoch": 0.04981568197668626, "grad_norm": 1.1178940979524548, "learning_rate": 3.984330729086464e-05, "loss": 0.9497, "step": 125 }, { "epoch": 0.05021420743249975, "grad_norm": 0.741923762221831, "learning_rate": 3.984013003944056e-05, "loss": 1.0072, "step": 126 }, { "epoch": 0.05061273288831324, "grad_norm": 0.8682737579433879, "learning_rate": 3.983692102726551e-05, "loss": 1.0082, "step": 127 }, { "epoch": 0.05101125834412673, "grad_norm": 1.0434473812056535, "learning_rate": 3.983368025947657e-05, "loss": 0.9831, "step": 128 }, { "epoch": 0.05140978379994022, "grad_norm": 1.022692118220617, "learning_rate": 3.983040774126169e-05, "loss": 0.9566, "step": 129 }, { "epoch": 0.051808309255753714, "grad_norm": 1.2484490098325738, "learning_rate": 3.9827103477859605e-05, "loss": 1.0005, "step": 130 }, { "epoch": 0.0522068347115672, "grad_norm": 0.8271462851970588, "learning_rate": 3.9823767474559905e-05, "loss": 0.968, "step": 131 }, { "epoch": 0.05260536016738069, "grad_norm": 0.8519476486723382, "learning_rate": 3.982039973670298e-05, "loss": 0.9617, "step": 132 }, { "epoch": 0.053003885623194184, "grad_norm": 0.8333279737618872, "learning_rate": 3.9817000269680005e-05, "loss": 0.9757, "step": 133 }, { "epoch": 0.05340241107900767, "grad_norm": 0.8703944410797784, "learning_rate": 3.981356907893298e-05, "loss": 0.9917, "step": 134 }, { "epoch": 0.05380093653482116, "grad_norm": 0.9994780910035236, "learning_rate": 3.981010616995465e-05, "loss": 0.9603, "step": 135 }, { "epoch": 0.054199461990634655, "grad_norm": 1.1123731475641294, "learning_rate": 3.980661154828857e-05, "loss": 0.9695, "step": 136 }, { "epoch": 0.05459798744644814, "grad_norm": 0.9337508858933264, "learning_rate": 3.980308521952905e-05, "loss": 0.9786, "step": 137 }, { "epoch": 0.05499651290226163, "grad_norm": 0.8773514301553659, "learning_rate": 3.979952718932116e-05, "loss": 0.9829, "step": 138 }, { "epoch": 0.05539503835807512, "grad_norm": 0.8259379275752252, "learning_rate": 3.97959374633607e-05, "loss": 0.9731, "step": 139 }, { "epoch": 0.055793563813888614, "grad_norm": 0.9481177250720214, "learning_rate": 3.979231604739423e-05, "loss": 1.0004, "step": 140 }, { "epoch": 0.0561920892697021, "grad_norm": 1.0333391418969482, "learning_rate": 3.978866294721904e-05, "loss": 0.9685, "step": 141 }, { "epoch": 0.05659061472551559, "grad_norm": 0.9955889948584824, "learning_rate": 3.9784978168683134e-05, "loss": 0.9716, "step": 142 }, { "epoch": 0.056989140181329084, "grad_norm": 1.0603086583420307, "learning_rate": 3.978126171768523e-05, "loss": 0.9801, "step": 143 }, { "epoch": 0.05738766563714257, "grad_norm": 0.812587571522746, "learning_rate": 3.977751360017474e-05, "loss": 0.9595, "step": 144 }, { "epoch": 0.05778619109295606, "grad_norm": 0.7781386777987177, "learning_rate": 3.97737338221518e-05, "loss": 1.0095, "step": 145 }, { "epoch": 0.058184716548769555, "grad_norm": 0.9828802357688441, "learning_rate": 3.976992238966719e-05, "loss": 0.992, "step": 146 }, { "epoch": 0.05858324200458304, "grad_norm": 0.9416827586556631, "learning_rate": 3.976607930882238e-05, "loss": 0.9628, "step": 147 }, { "epoch": 0.05898176746039653, "grad_norm": 0.7650913970674944, "learning_rate": 3.97622045857695e-05, "loss": 0.9995, "step": 148 }, { "epoch": 0.059380292916210026, "grad_norm": 0.6668203189771907, "learning_rate": 3.9758298226711346e-05, "loss": 0.9709, "step": 149 }, { "epoch": 0.059778818372023514, "grad_norm": 0.9120833321517047, "learning_rate": 3.975436023790135e-05, "loss": 0.9644, "step": 150 }, { "epoch": 0.060177343827837, "grad_norm": 1.0907868368195024, "learning_rate": 3.975039062564357e-05, "loss": 0.9628, "step": 151 }, { "epoch": 0.060575869283650496, "grad_norm": 0.9368612099613929, "learning_rate": 3.9746389396292705e-05, "loss": 0.9937, "step": 152 }, { "epoch": 0.060974394739463984, "grad_norm": 0.9737465093992717, "learning_rate": 3.974235655625405e-05, "loss": 0.961, "step": 153 }, { "epoch": 0.06137292019527747, "grad_norm": 0.8996382068900802, "learning_rate": 3.973829211198352e-05, "loss": 0.9339, "step": 154 }, { "epoch": 0.06177144565109096, "grad_norm": 0.9165314697100433, "learning_rate": 3.973419606998761e-05, "loss": 0.9568, "step": 155 }, { "epoch": 0.062169971106904455, "grad_norm": 0.9274654639084001, "learning_rate": 3.9730068436823395e-05, "loss": 0.9389, "step": 156 }, { "epoch": 0.06256849656271794, "grad_norm": 0.8441046935557636, "learning_rate": 3.9725909219098546e-05, "loss": 0.9388, "step": 157 }, { "epoch": 0.06296702201853144, "grad_norm": 0.9902084616052694, "learning_rate": 3.972171842347127e-05, "loss": 0.9596, "step": 158 }, { "epoch": 0.06336554747434492, "grad_norm": 1.1115069818338272, "learning_rate": 3.9717496056650325e-05, "loss": 0.9421, "step": 159 }, { "epoch": 0.06376407293015841, "grad_norm": 0.9808461355374265, "learning_rate": 3.9713242125395035e-05, "loss": 0.9549, "step": 160 }, { "epoch": 0.06416259838597191, "grad_norm": 0.6838984370781541, "learning_rate": 3.970895663651523e-05, "loss": 0.9577, "step": 161 }, { "epoch": 0.06456112384178539, "grad_norm": 0.5849603441312805, "learning_rate": 3.970463959687127e-05, "loss": 0.9391, "step": 162 }, { "epoch": 0.06495964929759888, "grad_norm": 0.8012305866704266, "learning_rate": 3.9700291013374005e-05, "loss": 0.9749, "step": 163 }, { "epoch": 0.06535817475341238, "grad_norm": 0.9116141961043895, "learning_rate": 3.969591089298481e-05, "loss": 0.9734, "step": 164 }, { "epoch": 0.06575670020922586, "grad_norm": 0.7666536547751186, "learning_rate": 3.9691499242715524e-05, "loss": 0.9679, "step": 165 }, { "epoch": 0.06615522566503935, "grad_norm": 0.5587510714841003, "learning_rate": 3.968705606962847e-05, "loss": 0.9581, "step": 166 }, { "epoch": 0.06655375112085285, "grad_norm": 0.5276592494284221, "learning_rate": 3.9682581380836415e-05, "loss": 0.9171, "step": 167 }, { "epoch": 0.06695227657666633, "grad_norm": 0.7394645356756339, "learning_rate": 3.967807518350261e-05, "loss": 0.9612, "step": 168 }, { "epoch": 0.06735080203247983, "grad_norm": 1.1007193079182445, "learning_rate": 3.967353748484071e-05, "loss": 0.9118, "step": 169 }, { "epoch": 0.06774932748829332, "grad_norm": 1.0581797805010837, "learning_rate": 3.966896829211483e-05, "loss": 0.9641, "step": 170 }, { "epoch": 0.0681478529441068, "grad_norm": 0.8757602622657974, "learning_rate": 3.966436761263949e-05, "loss": 0.9566, "step": 171 }, { "epoch": 0.0685463783999203, "grad_norm": 0.8687270000650961, "learning_rate": 3.96597354537796e-05, "loss": 0.9701, "step": 172 }, { "epoch": 0.06894490385573379, "grad_norm": 1.0166656418615307, "learning_rate": 3.965507182295049e-05, "loss": 0.9564, "step": 173 }, { "epoch": 0.06934342931154727, "grad_norm": 0.8215033487256318, "learning_rate": 3.965037672761785e-05, "loss": 1.0189, "step": 174 }, { "epoch": 0.06974195476736077, "grad_norm": 0.7260355443552792, "learning_rate": 3.964565017529775e-05, "loss": 0.9431, "step": 175 }, { "epoch": 0.07014048022317426, "grad_norm": 0.7653437077317252, "learning_rate": 3.9640892173556624e-05, "loss": 0.947, "step": 176 }, { "epoch": 0.07053900567898774, "grad_norm": 0.9116401355112523, "learning_rate": 3.963610273001122e-05, "loss": 0.9472, "step": 177 }, { "epoch": 0.07093753113480124, "grad_norm": 0.9609189669126867, "learning_rate": 3.963128185232866e-05, "loss": 0.9427, "step": 178 }, { "epoch": 0.07133605659061472, "grad_norm": 0.8565841157727021, "learning_rate": 3.9626429548226364e-05, "loss": 0.9477, "step": 179 }, { "epoch": 0.07173458204642821, "grad_norm": 0.7814839364600451, "learning_rate": 3.962154582547205e-05, "loss": 0.9094, "step": 180 }, { "epoch": 0.07213310750224171, "grad_norm": 0.7824911161278741, "learning_rate": 3.961663069188377e-05, "loss": 0.9647, "step": 181 }, { "epoch": 0.07253163295805519, "grad_norm": 0.8488502117489565, "learning_rate": 3.9611684155329825e-05, "loss": 0.9634, "step": 182 }, { "epoch": 0.07293015841386868, "grad_norm": 0.8663407155900105, "learning_rate": 3.9606706223728796e-05, "loss": 0.9522, "step": 183 }, { "epoch": 0.07332868386968218, "grad_norm": 0.8427930838971712, "learning_rate": 3.960169690504952e-05, "loss": 0.957, "step": 184 }, { "epoch": 0.07372720932549566, "grad_norm": 0.8728940813219989, "learning_rate": 3.9596656207311096e-05, "loss": 0.9103, "step": 185 }, { "epoch": 0.07412573478130915, "grad_norm": 0.8964681349142457, "learning_rate": 3.9591584138582835e-05, "loss": 0.9783, "step": 186 }, { "epoch": 0.07452426023712265, "grad_norm": 0.747475640936641, "learning_rate": 3.958648070698428e-05, "loss": 0.9343, "step": 187 }, { "epoch": 0.07492278569293613, "grad_norm": 0.6081767246649388, "learning_rate": 3.9581345920685176e-05, "loss": 0.9426, "step": 188 }, { "epoch": 0.07532131114874963, "grad_norm": 0.646327313636509, "learning_rate": 3.957617978790546e-05, "loss": 0.936, "step": 189 }, { "epoch": 0.07571983660456312, "grad_norm": 0.5762067425821266, "learning_rate": 3.9570982316915245e-05, "loss": 0.9869, "step": 190 }, { "epoch": 0.0761183620603766, "grad_norm": 0.5277633100224635, "learning_rate": 3.956575351603484e-05, "loss": 0.9247, "step": 191 }, { "epoch": 0.0765168875161901, "grad_norm": 0.6079283681455546, "learning_rate": 3.9560493393634665e-05, "loss": 0.9003, "step": 192 }, { "epoch": 0.07691541297200359, "grad_norm": 0.6485268072649816, "learning_rate": 3.955520195813531e-05, "loss": 0.9428, "step": 193 }, { "epoch": 0.07731393842781707, "grad_norm": 0.6753541169437033, "learning_rate": 3.954987921800749e-05, "loss": 0.9546, "step": 194 }, { "epoch": 0.07771246388363057, "grad_norm": 0.6320121035158947, "learning_rate": 3.954452518177201e-05, "loss": 0.9425, "step": 195 }, { "epoch": 0.07811098933944406, "grad_norm": 0.8024382967580528, "learning_rate": 3.953913985799982e-05, "loss": 0.9575, "step": 196 }, { "epoch": 0.07850951479525754, "grad_norm": 0.6451828329766384, "learning_rate": 3.95337232553119e-05, "loss": 0.9618, "step": 197 }, { "epoch": 0.07890804025107104, "grad_norm": 0.5637480570882453, "learning_rate": 3.952827538237934e-05, "loss": 0.9436, "step": 198 }, { "epoch": 0.07930656570688453, "grad_norm": 0.6287403860445728, "learning_rate": 3.952279624792329e-05, "loss": 0.9585, "step": 199 }, { "epoch": 0.07970509116269801, "grad_norm": 0.6133071011985074, "learning_rate": 3.9517285860714915e-05, "loss": 0.9447, "step": 200 }, { "epoch": 0.08010361661851151, "grad_norm": 0.5782665343325509, "learning_rate": 3.951174422957545e-05, "loss": 0.9381, "step": 201 }, { "epoch": 0.080502142074325, "grad_norm": 0.5255985375741193, "learning_rate": 3.950617136337611e-05, "loss": 0.893, "step": 202 }, { "epoch": 0.08090066753013848, "grad_norm": 0.5926087052436324, "learning_rate": 3.950056727103813e-05, "loss": 0.9226, "step": 203 }, { "epoch": 0.08129919298595198, "grad_norm": 0.6283429524618049, "learning_rate": 3.949493196153274e-05, "loss": 0.9381, "step": 204 }, { "epoch": 0.08169771844176547, "grad_norm": 0.6457268317630597, "learning_rate": 3.948926544388112e-05, "loss": 0.9097, "step": 205 }, { "epoch": 0.08209624389757895, "grad_norm": 0.8396169584539872, "learning_rate": 3.948356772715443e-05, "loss": 0.9303, "step": 206 }, { "epoch": 0.08249476935339245, "grad_norm": 0.9970461466822023, "learning_rate": 3.9477838820473776e-05, "loss": 0.9218, "step": 207 }, { "epoch": 0.08289329480920594, "grad_norm": 1.1370242066432408, "learning_rate": 3.9472078733010174e-05, "loss": 0.9393, "step": 208 }, { "epoch": 0.08329182026501943, "grad_norm": 0.8481740560416752, "learning_rate": 3.946628747398457e-05, "loss": 0.9539, "step": 209 }, { "epoch": 0.08369034572083292, "grad_norm": 0.7749044455116462, "learning_rate": 3.94604650526678e-05, "loss": 0.9064, "step": 210 }, { "epoch": 0.0840888711766464, "grad_norm": 0.8242769108366514, "learning_rate": 3.9454611478380604e-05, "loss": 0.9578, "step": 211 }, { "epoch": 0.0844873966324599, "grad_norm": 0.7060014980899263, "learning_rate": 3.944872676049358e-05, "loss": 0.9586, "step": 212 }, { "epoch": 0.08488592208827339, "grad_norm": 0.8645214673367116, "learning_rate": 3.944281090842718e-05, "loss": 0.919, "step": 213 }, { "epoch": 0.08528444754408687, "grad_norm": 1.0934973623844684, "learning_rate": 3.943686393165171e-05, "loss": 0.955, "step": 214 }, { "epoch": 0.08568297299990037, "grad_norm": 0.8673963340448777, "learning_rate": 3.943088583968726e-05, "loss": 0.9304, "step": 215 }, { "epoch": 0.08608149845571386, "grad_norm": 0.835352668198479, "learning_rate": 3.9424876642103805e-05, "loss": 0.9615, "step": 216 }, { "epoch": 0.08648002391152734, "grad_norm": 0.8611507271565368, "learning_rate": 3.9418836348521045e-05, "loss": 0.929, "step": 217 }, { "epoch": 0.08687854936734084, "grad_norm": 0.8251142229076397, "learning_rate": 3.941276496860849e-05, "loss": 0.9642, "step": 218 }, { "epoch": 0.08727707482315433, "grad_norm": 0.7930096914994095, "learning_rate": 3.9406662512085416e-05, "loss": 0.9622, "step": 219 }, { "epoch": 0.08767560027896781, "grad_norm": 0.6629634789706741, "learning_rate": 3.940052898872084e-05, "loss": 0.9083, "step": 220 }, { "epoch": 0.08807412573478131, "grad_norm": 0.6439473882747895, "learning_rate": 3.93943644083335e-05, "loss": 0.9155, "step": 221 }, { "epoch": 0.0884726511905948, "grad_norm": 0.7838839076395734, "learning_rate": 3.9388168780791883e-05, "loss": 0.9127, "step": 222 }, { "epoch": 0.08887117664640828, "grad_norm": 0.7675321153839495, "learning_rate": 3.938194211601416e-05, "loss": 0.9313, "step": 223 }, { "epoch": 0.08926970210222178, "grad_norm": 0.670858178864275, "learning_rate": 3.937568442396817e-05, "loss": 0.9215, "step": 224 }, { "epoch": 0.08966822755803527, "grad_norm": 0.5430100456071535, "learning_rate": 3.936939571467145e-05, "loss": 0.9215, "step": 225 }, { "epoch": 0.09006675301384875, "grad_norm": 0.645122412385762, "learning_rate": 3.9363075998191175e-05, "loss": 0.9518, "step": 226 }, { "epoch": 0.09046527846966225, "grad_norm": 0.7124302784985599, "learning_rate": 3.935672528464416e-05, "loss": 0.9472, "step": 227 }, { "epoch": 0.09086380392547574, "grad_norm": 0.6944932728108557, "learning_rate": 3.935034358419684e-05, "loss": 0.9043, "step": 228 }, { "epoch": 0.09126232938128923, "grad_norm": 0.7428731739366404, "learning_rate": 3.934393090706527e-05, "loss": 0.9276, "step": 229 }, { "epoch": 0.09166085483710272, "grad_norm": 0.7237371542570604, "learning_rate": 3.9337487263515065e-05, "loss": 0.966, "step": 230 }, { "epoch": 0.09205938029291622, "grad_norm": 0.7584658608788947, "learning_rate": 3.9331012663861435e-05, "loss": 0.9195, "step": 231 }, { "epoch": 0.0924579057487297, "grad_norm": 0.8151922759638645, "learning_rate": 3.932450711846914e-05, "loss": 0.9352, "step": 232 }, { "epoch": 0.09285643120454319, "grad_norm": 0.7799720068156271, "learning_rate": 3.931797063775246e-05, "loss": 0.867, "step": 233 }, { "epoch": 0.09325495666035669, "grad_norm": 0.7195572843892059, "learning_rate": 3.931140323217524e-05, "loss": 0.9485, "step": 234 }, { "epoch": 0.09365348211617017, "grad_norm": 0.5676394070871306, "learning_rate": 3.9304804912250785e-05, "loss": 0.9479, "step": 235 }, { "epoch": 0.09405200757198366, "grad_norm": 0.603554245394414, "learning_rate": 3.9298175688541916e-05, "loss": 0.8831, "step": 236 }, { "epoch": 0.09445053302779716, "grad_norm": 0.7416220601956737, "learning_rate": 3.9291515571660926e-05, "loss": 0.9537, "step": 237 }, { "epoch": 0.09484905848361064, "grad_norm": 0.7400965861280613, "learning_rate": 3.928482457226954e-05, "loss": 0.9087, "step": 238 }, { "epoch": 0.09524758393942413, "grad_norm": 0.8210302591504622, "learning_rate": 3.927810270107894e-05, "loss": 0.8909, "step": 239 }, { "epoch": 0.09564610939523763, "grad_norm": 0.7137333890568919, "learning_rate": 3.9271349968849735e-05, "loss": 0.9301, "step": 240 }, { "epoch": 0.09604463485105111, "grad_norm": 0.5314296904513427, "learning_rate": 3.9264566386391925e-05, "loss": 0.9233, "step": 241 }, { "epoch": 0.0964431603068646, "grad_norm": 0.6166230859092278, "learning_rate": 3.925775196456488e-05, "loss": 0.8958, "step": 242 }, { "epoch": 0.09684168576267808, "grad_norm": 0.6958069670048053, "learning_rate": 3.925090671427739e-05, "loss": 0.9278, "step": 243 }, { "epoch": 0.09724021121849158, "grad_norm": 0.6889489906309647, "learning_rate": 3.9244030646487524e-05, "loss": 0.9453, "step": 244 }, { "epoch": 0.09763873667430507, "grad_norm": 0.6113796976521826, "learning_rate": 3.923712377220275e-05, "loss": 0.9042, "step": 245 }, { "epoch": 0.09803726213011855, "grad_norm": 0.5576650794524141, "learning_rate": 3.9230186102479824e-05, "loss": 0.9457, "step": 246 }, { "epoch": 0.09843578758593205, "grad_norm": 0.5050600559673174, "learning_rate": 3.922321764842479e-05, "loss": 0.9128, "step": 247 }, { "epoch": 0.09883431304174554, "grad_norm": 0.5792206556379802, "learning_rate": 3.9216218421193e-05, "loss": 0.9346, "step": 248 }, { "epoch": 0.09923283849755903, "grad_norm": 0.7117260079905121, "learning_rate": 3.9209188431989044e-05, "loss": 0.9242, "step": 249 }, { "epoch": 0.09963136395337252, "grad_norm": 0.5411445590412157, "learning_rate": 3.920212769206676e-05, "loss": 0.8808, "step": 250 }, { "epoch": 0.10002988940918602, "grad_norm": 0.6383206470777513, "learning_rate": 3.919503621272924e-05, "loss": 0.9014, "step": 251 }, { "epoch": 0.1004284148649995, "grad_norm": 0.5870726238645826, "learning_rate": 3.918791400532874e-05, "loss": 0.8833, "step": 252 }, { "epoch": 0.10082694032081299, "grad_norm": 0.5677734635394229, "learning_rate": 3.918076108126675e-05, "loss": 0.9128, "step": 253 }, { "epoch": 0.10122546577662649, "grad_norm": 0.6397706154970396, "learning_rate": 3.91735774519939e-05, "loss": 0.8892, "step": 254 }, { "epoch": 0.10162399123243997, "grad_norm": 0.6025324225700743, "learning_rate": 3.916636312900999e-05, "loss": 0.8924, "step": 255 }, { "epoch": 0.10202251668825346, "grad_norm": 0.5993884554898958, "learning_rate": 3.9159118123863964e-05, "loss": 0.9249, "step": 256 }, { "epoch": 0.10242104214406696, "grad_norm": 0.5139396732603375, "learning_rate": 3.915184244815385e-05, "loss": 0.8977, "step": 257 }, { "epoch": 0.10281956759988044, "grad_norm": 0.582154606226688, "learning_rate": 3.9144536113526806e-05, "loss": 0.9064, "step": 258 }, { "epoch": 0.10321809305569393, "grad_norm": 0.5737111995658692, "learning_rate": 3.9137199131679064e-05, "loss": 0.9003, "step": 259 }, { "epoch": 0.10361661851150743, "grad_norm": 0.5501293796446101, "learning_rate": 3.912983151435591e-05, "loss": 0.9053, "step": 260 }, { "epoch": 0.10401514396732091, "grad_norm": 0.5616191359055138, "learning_rate": 3.912243327335167e-05, "loss": 0.9059, "step": 261 }, { "epoch": 0.1044136694231344, "grad_norm": 0.5779090114992178, "learning_rate": 3.91150044205097e-05, "loss": 0.9215, "step": 262 }, { "epoch": 0.1048121948789479, "grad_norm": 0.5621858273638006, "learning_rate": 3.910754496772236e-05, "loss": 0.9231, "step": 263 }, { "epoch": 0.10521072033476138, "grad_norm": 0.5187195624713219, "learning_rate": 3.9100054926931e-05, "loss": 0.9077, "step": 264 }, { "epoch": 0.10560924579057487, "grad_norm": 0.5484074323672972, "learning_rate": 3.909253431012592e-05, "loss": 0.8943, "step": 265 }, { "epoch": 0.10600777124638837, "grad_norm": 0.5860726206207597, "learning_rate": 3.9084983129346386e-05, "loss": 0.9215, "step": 266 }, { "epoch": 0.10640629670220185, "grad_norm": 0.5785145319929371, "learning_rate": 3.907740139668058e-05, "loss": 0.9079, "step": 267 }, { "epoch": 0.10680482215801534, "grad_norm": 0.6293154987830761, "learning_rate": 3.9069789124265595e-05, "loss": 0.9199, "step": 268 }, { "epoch": 0.10720334761382884, "grad_norm": 0.6138996610001156, "learning_rate": 3.906214632428742e-05, "loss": 0.9307, "step": 269 }, { "epoch": 0.10760187306964232, "grad_norm": 0.5574707399267468, "learning_rate": 3.90544730089809e-05, "loss": 0.9235, "step": 270 }, { "epoch": 0.10800039852545582, "grad_norm": 0.49410092240642955, "learning_rate": 3.904676919062973e-05, "loss": 0.8892, "step": 271 }, { "epoch": 0.10839892398126931, "grad_norm": 0.551637520171974, "learning_rate": 3.903903488156646e-05, "loss": 0.9133, "step": 272 }, { "epoch": 0.10879744943708279, "grad_norm": 0.5731759290280689, "learning_rate": 3.903127009417244e-05, "loss": 0.8961, "step": 273 }, { "epoch": 0.10919597489289629, "grad_norm": 0.5762364732869328, "learning_rate": 3.9023474840877775e-05, "loss": 0.8803, "step": 274 }, { "epoch": 0.10959450034870978, "grad_norm": 0.6947758285401612, "learning_rate": 3.901564913416139e-05, "loss": 0.8906, "step": 275 }, { "epoch": 0.10999302580452326, "grad_norm": 0.7885605570685301, "learning_rate": 3.9007792986550937e-05, "loss": 0.9016, "step": 276 }, { "epoch": 0.11039155126033676, "grad_norm": 0.779205530434434, "learning_rate": 3.8999906410622805e-05, "loss": 0.909, "step": 277 }, { "epoch": 0.11079007671615024, "grad_norm": 0.9502303024617071, "learning_rate": 3.899198941900209e-05, "loss": 0.8972, "step": 278 }, { "epoch": 0.11118860217196373, "grad_norm": 0.7020948558600761, "learning_rate": 3.898404202436258e-05, "loss": 0.8992, "step": 279 }, { "epoch": 0.11158712762777723, "grad_norm": 0.6253004452655916, "learning_rate": 3.8976064239426727e-05, "loss": 0.8983, "step": 280 }, { "epoch": 0.11198565308359071, "grad_norm": 0.48947446603739525, "learning_rate": 3.896805607696565e-05, "loss": 0.9092, "step": 281 }, { "epoch": 0.1123841785394042, "grad_norm": 0.5305885289397677, "learning_rate": 3.896001754979908e-05, "loss": 0.8828, "step": 282 }, { "epoch": 0.1127827039952177, "grad_norm": 0.6511594701603155, "learning_rate": 3.8951948670795356e-05, "loss": 0.8949, "step": 283 }, { "epoch": 0.11318122945103118, "grad_norm": 0.7838264076235747, "learning_rate": 3.8943849452871416e-05, "loss": 0.9061, "step": 284 }, { "epoch": 0.11357975490684467, "grad_norm": 0.8176144028366352, "learning_rate": 3.8935719908992776e-05, "loss": 0.9139, "step": 285 }, { "epoch": 0.11397828036265817, "grad_norm": 0.7483860858548197, "learning_rate": 3.892756005217347e-05, "loss": 0.9092, "step": 286 }, { "epoch": 0.11437680581847165, "grad_norm": 0.6145924322571729, "learning_rate": 3.891936989547608e-05, "loss": 0.9052, "step": 287 }, { "epoch": 0.11477533127428514, "grad_norm": 0.5793992708257767, "learning_rate": 3.891114945201168e-05, "loss": 0.9041, "step": 288 }, { "epoch": 0.11517385673009864, "grad_norm": 0.6386399436855802, "learning_rate": 3.890289873493984e-05, "loss": 0.8765, "step": 289 }, { "epoch": 0.11557238218591212, "grad_norm": 0.7545452332949172, "learning_rate": 3.889461775746858e-05, "loss": 0.9407, "step": 290 }, { "epoch": 0.11597090764172562, "grad_norm": 0.643068181670375, "learning_rate": 3.888630653285437e-05, "loss": 0.9044, "step": 291 }, { "epoch": 0.11636943309753911, "grad_norm": 0.4963770968380342, "learning_rate": 3.887796507440211e-05, "loss": 0.9244, "step": 292 }, { "epoch": 0.11676795855335259, "grad_norm": 0.5330885480112182, "learning_rate": 3.8869593395465066e-05, "loss": 0.9007, "step": 293 }, { "epoch": 0.11716648400916609, "grad_norm": 0.6867642996793515, "learning_rate": 3.8861191509444926e-05, "loss": 0.8923, "step": 294 }, { "epoch": 0.11756500946497958, "grad_norm": 0.5931835622625073, "learning_rate": 3.88527594297917e-05, "loss": 0.9172, "step": 295 }, { "epoch": 0.11796353492079306, "grad_norm": 0.6693705563895682, "learning_rate": 3.884429717000376e-05, "loss": 0.8941, "step": 296 }, { "epoch": 0.11836206037660656, "grad_norm": 0.6616211136884201, "learning_rate": 3.883580474362777e-05, "loss": 0.9208, "step": 297 }, { "epoch": 0.11876058583242005, "grad_norm": 0.5241813662858397, "learning_rate": 3.88272821642587e-05, "loss": 0.9295, "step": 298 }, { "epoch": 0.11915911128823353, "grad_norm": 0.47998991090285037, "learning_rate": 3.8818729445539765e-05, "loss": 0.9134, "step": 299 }, { "epoch": 0.11955763674404703, "grad_norm": 0.5575410784453981, "learning_rate": 3.881014660116246e-05, "loss": 0.9264, "step": 300 }, { "epoch": 0.11995616219986052, "grad_norm": 0.5762249128335137, "learning_rate": 3.880153364486649e-05, "loss": 0.8924, "step": 301 }, { "epoch": 0.120354687655674, "grad_norm": 0.9257335770621549, "learning_rate": 3.8792890590439764e-05, "loss": 0.8861, "step": 302 }, { "epoch": 0.1207532131114875, "grad_norm": 0.5676730409091856, "learning_rate": 3.878421745171839e-05, "loss": 0.9112, "step": 303 }, { "epoch": 0.12115173856730099, "grad_norm": 0.4637670476081397, "learning_rate": 3.87755142425866e-05, "loss": 0.8917, "step": 304 }, { "epoch": 0.12155026402311447, "grad_norm": 0.5310661309184922, "learning_rate": 3.8766780976976795e-05, "loss": 0.9182, "step": 305 }, { "epoch": 0.12194878947892797, "grad_norm": 0.5584733508565086, "learning_rate": 3.8758017668869484e-05, "loss": 0.9396, "step": 306 }, { "epoch": 0.12234731493474146, "grad_norm": 0.5545890950572487, "learning_rate": 3.8749224332293265e-05, "loss": 0.9016, "step": 307 }, { "epoch": 0.12274584039055494, "grad_norm": 0.5692405766886073, "learning_rate": 3.874040098132481e-05, "loss": 0.8543, "step": 308 }, { "epoch": 0.12314436584636844, "grad_norm": 0.5829038395471384, "learning_rate": 3.873154763008884e-05, "loss": 0.8766, "step": 309 }, { "epoch": 0.12354289130218192, "grad_norm": 0.6399720498446062, "learning_rate": 3.872266429275809e-05, "loss": 0.8924, "step": 310 }, { "epoch": 0.12394141675799542, "grad_norm": 0.5563668304631704, "learning_rate": 3.871375098355331e-05, "loss": 0.9351, "step": 311 }, { "epoch": 0.12433994221380891, "grad_norm": 0.4891838671794899, "learning_rate": 3.8704807716743235e-05, "loss": 0.9084, "step": 312 }, { "epoch": 0.12473846766962239, "grad_norm": 0.5390514488310643, "learning_rate": 3.869583450664454e-05, "loss": 0.9006, "step": 313 }, { "epoch": 0.12513699312543589, "grad_norm": 0.6535963479715494, "learning_rate": 3.868683136762185e-05, "loss": 0.8946, "step": 314 }, { "epoch": 0.12553551858124937, "grad_norm": 0.6360077741778746, "learning_rate": 3.867779831408768e-05, "loss": 0.8997, "step": 315 }, { "epoch": 0.12593404403706288, "grad_norm": 0.47594974316839744, "learning_rate": 3.8668735360502474e-05, "loss": 0.9135, "step": 316 }, { "epoch": 0.12633256949287636, "grad_norm": 0.554635644525251, "learning_rate": 3.865964252137449e-05, "loss": 0.9056, "step": 317 }, { "epoch": 0.12673109494868984, "grad_norm": 0.6542560775862073, "learning_rate": 3.8650519811259856e-05, "loss": 0.8837, "step": 318 }, { "epoch": 0.12712962040450335, "grad_norm": 0.5504279116926618, "learning_rate": 3.864136724476252e-05, "loss": 0.909, "step": 319 }, { "epoch": 0.12752814586031683, "grad_norm": 0.4207363922400064, "learning_rate": 3.863218483653423e-05, "loss": 0.9199, "step": 320 }, { "epoch": 0.1279266713161303, "grad_norm": 0.5480250503031011, "learning_rate": 3.862297260127447e-05, "loss": 0.9115, "step": 321 }, { "epoch": 0.12832519677194382, "grad_norm": 0.7116612376007252, "learning_rate": 3.8613730553730525e-05, "loss": 0.902, "step": 322 }, { "epoch": 0.1287237222277573, "grad_norm": 0.7034441679085705, "learning_rate": 3.8604458708697354e-05, "loss": 0.93, "step": 323 }, { "epoch": 0.12912224768357078, "grad_norm": 0.6464461922880574, "learning_rate": 3.859515708101766e-05, "loss": 0.9027, "step": 324 }, { "epoch": 0.1295207731393843, "grad_norm": 0.5724183071806952, "learning_rate": 3.858582568558179e-05, "loss": 0.9152, "step": 325 }, { "epoch": 0.12991929859519777, "grad_norm": 0.5434975703367534, "learning_rate": 3.857646453732776e-05, "loss": 0.8873, "step": 326 }, { "epoch": 0.13031782405101125, "grad_norm": 0.5134121010042222, "learning_rate": 3.856707365124122e-05, "loss": 0.8728, "step": 327 }, { "epoch": 0.13071634950682476, "grad_norm": 0.5097236839503941, "learning_rate": 3.85576530423554e-05, "loss": 0.911, "step": 328 }, { "epoch": 0.13111487496263824, "grad_norm": 0.5227325664183777, "learning_rate": 3.854820272575115e-05, "loss": 0.8658, "step": 329 }, { "epoch": 0.13151340041845172, "grad_norm": 0.6322853032653781, "learning_rate": 3.853872271655685e-05, "loss": 0.891, "step": 330 }, { "epoch": 0.13191192587426523, "grad_norm": 0.5184506986493536, "learning_rate": 3.852921302994841e-05, "loss": 0.8612, "step": 331 }, { "epoch": 0.1323104513300787, "grad_norm": 0.5046807022502423, "learning_rate": 3.8519673681149265e-05, "loss": 0.8994, "step": 332 }, { "epoch": 0.1327089767858922, "grad_norm": 0.5061850051002039, "learning_rate": 3.851010468543033e-05, "loss": 0.8849, "step": 333 }, { "epoch": 0.1331075022417057, "grad_norm": 0.4935717896499033, "learning_rate": 3.850050605810997e-05, "loss": 0.9285, "step": 334 }, { "epoch": 0.13350602769751918, "grad_norm": 0.4947315091214366, "learning_rate": 3.8490877814553996e-05, "loss": 0.9004, "step": 335 }, { "epoch": 0.13390455315333266, "grad_norm": 0.46140205389577676, "learning_rate": 3.848121997017563e-05, "loss": 0.9065, "step": 336 }, { "epoch": 0.13430307860914617, "grad_norm": 0.47248289695698514, "learning_rate": 3.847153254043547e-05, "loss": 0.8805, "step": 337 }, { "epoch": 0.13470160406495965, "grad_norm": 0.45224697013215626, "learning_rate": 3.846181554084147e-05, "loss": 0.896, "step": 338 }, { "epoch": 0.13510012952077313, "grad_norm": 0.527417114425614, "learning_rate": 3.8452068986948956e-05, "loss": 0.9383, "step": 339 }, { "epoch": 0.13549865497658664, "grad_norm": 0.5092127958405034, "learning_rate": 3.844229289436053e-05, "loss": 0.8961, "step": 340 }, { "epoch": 0.13589718043240012, "grad_norm": 0.4746200986505316, "learning_rate": 3.8432487278726084e-05, "loss": 0.9281, "step": 341 }, { "epoch": 0.1362957058882136, "grad_norm": 0.484617132707988, "learning_rate": 3.842265215574279e-05, "loss": 0.8799, "step": 342 }, { "epoch": 0.1366942313440271, "grad_norm": 0.472139637172473, "learning_rate": 3.8412787541155035e-05, "loss": 0.8571, "step": 343 }, { "epoch": 0.1370927567998406, "grad_norm": 0.4750954980383929, "learning_rate": 3.840289345075444e-05, "loss": 0.8997, "step": 344 }, { "epoch": 0.13749128225565407, "grad_norm": 0.5058566298011136, "learning_rate": 3.839296990037979e-05, "loss": 0.8947, "step": 345 }, { "epoch": 0.13788980771146758, "grad_norm": 0.5034036144166951, "learning_rate": 3.838301690591704e-05, "loss": 0.856, "step": 346 }, { "epoch": 0.13828833316728106, "grad_norm": 0.5109042435371637, "learning_rate": 3.8373034483299286e-05, "loss": 0.8676, "step": 347 }, { "epoch": 0.13868685862309454, "grad_norm": 0.5076861609812875, "learning_rate": 3.836302264850673e-05, "loss": 0.8899, "step": 348 }, { "epoch": 0.13908538407890805, "grad_norm": 0.48688791345770777, "learning_rate": 3.835298141756664e-05, "loss": 0.8952, "step": 349 }, { "epoch": 0.13948390953472153, "grad_norm": 0.4294678692671596, "learning_rate": 3.8342910806553374e-05, "loss": 0.896, "step": 350 }, { "epoch": 0.13988243499053502, "grad_norm": 0.4759618640018106, "learning_rate": 3.83328108315883e-05, "loss": 0.8925, "step": 351 }, { "epoch": 0.14028096044634852, "grad_norm": 0.498083239156812, "learning_rate": 3.8322681508839796e-05, "loss": 0.897, "step": 352 }, { "epoch": 0.140679485902162, "grad_norm": 0.47774282716676997, "learning_rate": 3.8312522854523236e-05, "loss": 0.853, "step": 353 }, { "epoch": 0.14107801135797549, "grad_norm": 0.5425614790073936, "learning_rate": 3.830233488490092e-05, "loss": 0.9072, "step": 354 }, { "epoch": 0.141476536813789, "grad_norm": 0.607352655774501, "learning_rate": 3.8292117616282116e-05, "loss": 0.8849, "step": 355 }, { "epoch": 0.14187506226960248, "grad_norm": 0.622366562638722, "learning_rate": 3.828187106502295e-05, "loss": 0.8743, "step": 356 }, { "epoch": 0.14227358772541596, "grad_norm": 0.6880401152515128, "learning_rate": 3.827159524752646e-05, "loss": 0.854, "step": 357 }, { "epoch": 0.14267211318122944, "grad_norm": 0.6320544909726663, "learning_rate": 3.8261290180242524e-05, "loss": 0.8823, "step": 358 }, { "epoch": 0.14307063863704295, "grad_norm": 0.6117634467858145, "learning_rate": 3.825095587966784e-05, "loss": 0.8821, "step": 359 }, { "epoch": 0.14346916409285643, "grad_norm": 0.5586681204591263, "learning_rate": 3.82405923623459e-05, "loss": 0.8851, "step": 360 }, { "epoch": 0.1438676895486699, "grad_norm": 0.568103604064326, "learning_rate": 3.823019964486698e-05, "loss": 0.8963, "step": 361 }, { "epoch": 0.14426621500448342, "grad_norm": 0.5481484665397642, "learning_rate": 3.8219777743868095e-05, "loss": 0.8847, "step": 362 }, { "epoch": 0.1446647404602969, "grad_norm": 0.5839213790650319, "learning_rate": 3.820932667603297e-05, "loss": 0.8858, "step": 363 }, { "epoch": 0.14506326591611038, "grad_norm": 0.6803626614692434, "learning_rate": 3.819884645809203e-05, "loss": 0.9316, "step": 364 }, { "epoch": 0.1454617913719239, "grad_norm": 0.5826226983177064, "learning_rate": 3.8188337106822364e-05, "loss": 0.8926, "step": 365 }, { "epoch": 0.14586031682773737, "grad_norm": 0.4587053421690505, "learning_rate": 3.8177798639047693e-05, "loss": 0.9015, "step": 366 }, { "epoch": 0.14625884228355085, "grad_norm": 0.4979532996043012, "learning_rate": 3.8167231071638355e-05, "loss": 0.9084, "step": 367 }, { "epoch": 0.14665736773936436, "grad_norm": 0.6060462788501415, "learning_rate": 3.815663442151127e-05, "loss": 0.8913, "step": 368 }, { "epoch": 0.14705589319517784, "grad_norm": 0.5719962639011669, "learning_rate": 3.8146008705629916e-05, "loss": 0.9119, "step": 369 }, { "epoch": 0.14745441865099132, "grad_norm": 0.49076638405233397, "learning_rate": 3.813535394100429e-05, "loss": 0.8802, "step": 370 }, { "epoch": 0.14785294410680483, "grad_norm": 0.49594758931441285, "learning_rate": 3.81246701446909e-05, "loss": 0.8639, "step": 371 }, { "epoch": 0.1482514695626183, "grad_norm": 0.5940377132680764, "learning_rate": 3.8113957333792744e-05, "loss": 0.87, "step": 372 }, { "epoch": 0.1486499950184318, "grad_norm": 0.5596407953869648, "learning_rate": 3.810321552545924e-05, "loss": 0.8875, "step": 373 }, { "epoch": 0.1490485204742453, "grad_norm": 0.5587229850427988, "learning_rate": 3.8092444736886235e-05, "loss": 0.8823, "step": 374 }, { "epoch": 0.14944704593005878, "grad_norm": 0.6185912922060778, "learning_rate": 3.808164498531598e-05, "loss": 0.8736, "step": 375 }, { "epoch": 0.14984557138587226, "grad_norm": 0.5707944153693156, "learning_rate": 3.8070816288037076e-05, "loss": 0.9053, "step": 376 }, { "epoch": 0.15024409684168577, "grad_norm": 0.5131528156556673, "learning_rate": 3.805995866238446e-05, "loss": 0.9038, "step": 377 }, { "epoch": 0.15064262229749925, "grad_norm": 0.5289298616408312, "learning_rate": 3.804907212573941e-05, "loss": 0.9067, "step": 378 }, { "epoch": 0.15104114775331273, "grad_norm": 0.5460088042514601, "learning_rate": 3.803815669552944e-05, "loss": 0.8742, "step": 379 }, { "epoch": 0.15143967320912624, "grad_norm": 0.5901247804029622, "learning_rate": 3.802721238922835e-05, "loss": 0.8788, "step": 380 }, { "epoch": 0.15183819866493972, "grad_norm": 0.43400747036846915, "learning_rate": 3.801623922435615e-05, "loss": 0.8676, "step": 381 }, { "epoch": 0.1522367241207532, "grad_norm": 0.580607227815199, "learning_rate": 3.800523721847906e-05, "loss": 0.9247, "step": 382 }, { "epoch": 0.1526352495765667, "grad_norm": 0.553191736940903, "learning_rate": 3.7994206389209457e-05, "loss": 0.8516, "step": 383 }, { "epoch": 0.1530337750323802, "grad_norm": 0.5178209878197958, "learning_rate": 3.7983146754205866e-05, "loss": 0.8759, "step": 384 }, { "epoch": 0.15343230048819367, "grad_norm": 0.5241403248580444, "learning_rate": 3.7972058331172935e-05, "loss": 0.9084, "step": 385 }, { "epoch": 0.15383082594400718, "grad_norm": 0.4871129484635027, "learning_rate": 3.796094113786137e-05, "loss": 0.886, "step": 386 }, { "epoch": 0.15422935139982066, "grad_norm": 0.43638582131414316, "learning_rate": 3.794979519206796e-05, "loss": 0.8884, "step": 387 }, { "epoch": 0.15462787685563414, "grad_norm": 0.4833333706695009, "learning_rate": 3.793862051163551e-05, "loss": 0.8911, "step": 388 }, { "epoch": 0.15502640231144765, "grad_norm": 0.5314502365145202, "learning_rate": 3.792741711445283e-05, "loss": 0.9347, "step": 389 }, { "epoch": 0.15542492776726113, "grad_norm": 0.47578888436804323, "learning_rate": 3.791618501845469e-05, "loss": 0.8512, "step": 390 }, { "epoch": 0.15582345322307461, "grad_norm": 0.5374852434985777, "learning_rate": 3.790492424162181e-05, "loss": 0.8765, "step": 391 }, { "epoch": 0.15622197867888812, "grad_norm": 0.568861342025691, "learning_rate": 3.789363480198083e-05, "loss": 0.88, "step": 392 }, { "epoch": 0.1566205041347016, "grad_norm": 0.5082814585192399, "learning_rate": 3.788231671760426e-05, "loss": 0.8846, "step": 393 }, { "epoch": 0.15701902959051509, "grad_norm": 0.5514304292988225, "learning_rate": 3.787097000661047e-05, "loss": 0.9023, "step": 394 }, { "epoch": 0.1574175550463286, "grad_norm": 0.5203382428096642, "learning_rate": 3.785959468716367e-05, "loss": 0.9036, "step": 395 }, { "epoch": 0.15781608050214208, "grad_norm": 0.43118668216324796, "learning_rate": 3.7848190777473836e-05, "loss": 0.8952, "step": 396 }, { "epoch": 0.15821460595795556, "grad_norm": 0.4912071245587214, "learning_rate": 3.783675829579675e-05, "loss": 0.8798, "step": 397 }, { "epoch": 0.15861313141376907, "grad_norm": 0.5961696064294701, "learning_rate": 3.7825297260433904e-05, "loss": 0.8888, "step": 398 }, { "epoch": 0.15901165686958255, "grad_norm": 0.7191150184982619, "learning_rate": 3.781380768973252e-05, "loss": 0.9002, "step": 399 }, { "epoch": 0.15941018232539603, "grad_norm": 0.7060067375415279, "learning_rate": 3.7802289602085485e-05, "loss": 0.8741, "step": 400 }, { "epoch": 0.15980870778120954, "grad_norm": 0.5469078244459111, "learning_rate": 3.779074301593135e-05, "loss": 0.8786, "step": 401 }, { "epoch": 0.16020723323702302, "grad_norm": 0.4518738436666743, "learning_rate": 3.777916794975428e-05, "loss": 0.8641, "step": 402 }, { "epoch": 0.1606057586928365, "grad_norm": 0.7446776049733693, "learning_rate": 3.776756442208402e-05, "loss": 0.8841, "step": 403 }, { "epoch": 0.16100428414865, "grad_norm": 0.8590281212461937, "learning_rate": 3.7755932451495906e-05, "loss": 0.8589, "step": 404 }, { "epoch": 0.1614028096044635, "grad_norm": 0.8179740795657136, "learning_rate": 3.774427205661077e-05, "loss": 0.8997, "step": 405 }, { "epoch": 0.16180133506027697, "grad_norm": 0.6554445877560577, "learning_rate": 3.773258325609499e-05, "loss": 0.8686, "step": 406 }, { "epoch": 0.16219986051609048, "grad_norm": 0.5244424483306168, "learning_rate": 3.7720866068660376e-05, "loss": 0.8705, "step": 407 }, { "epoch": 0.16259838597190396, "grad_norm": 0.5471724085897548, "learning_rate": 3.7709120513064196e-05, "loss": 0.8629, "step": 408 }, { "epoch": 0.16299691142771744, "grad_norm": 0.6834100949875108, "learning_rate": 3.769734660810915e-05, "loss": 0.8863, "step": 409 }, { "epoch": 0.16339543688353095, "grad_norm": 0.7279947229048482, "learning_rate": 3.768554437264329e-05, "loss": 0.8666, "step": 410 }, { "epoch": 0.16379396233934443, "grad_norm": 0.6176989230226226, "learning_rate": 3.767371382556003e-05, "loss": 0.8537, "step": 411 }, { "epoch": 0.1641924877951579, "grad_norm": 0.4903712989166882, "learning_rate": 3.766185498579813e-05, "loss": 0.903, "step": 412 }, { "epoch": 0.16459101325097142, "grad_norm": 0.552748741724315, "learning_rate": 3.76499678723416e-05, "loss": 0.8765, "step": 413 }, { "epoch": 0.1649895387067849, "grad_norm": 0.6272889269130209, "learning_rate": 3.763805250421974e-05, "loss": 0.8738, "step": 414 }, { "epoch": 0.16538806416259838, "grad_norm": 0.5264119048766897, "learning_rate": 3.762610890050707e-05, "loss": 0.8776, "step": 415 }, { "epoch": 0.1657865896184119, "grad_norm": 0.5169756029407534, "learning_rate": 3.761413708032332e-05, "loss": 0.9039, "step": 416 }, { "epoch": 0.16618511507422537, "grad_norm": 0.5970794940209743, "learning_rate": 3.760213706283339e-05, "loss": 0.9157, "step": 417 }, { "epoch": 0.16658364053003885, "grad_norm": 0.5978586824697808, "learning_rate": 3.759010886724731e-05, "loss": 0.8627, "step": 418 }, { "epoch": 0.16698216598585236, "grad_norm": 0.6350014516716387, "learning_rate": 3.757805251282021e-05, "loss": 0.8924, "step": 419 }, { "epoch": 0.16738069144166584, "grad_norm": 0.4788632516360886, "learning_rate": 3.756596801885232e-05, "loss": 0.8823, "step": 420 }, { "epoch": 0.16777921689747932, "grad_norm": 0.4586359434458119, "learning_rate": 3.755385540468892e-05, "loss": 0.8929, "step": 421 }, { "epoch": 0.1681777423532928, "grad_norm": 0.44317101728143243, "learning_rate": 3.7541714689720265e-05, "loss": 0.8649, "step": 422 }, { "epoch": 0.1685762678091063, "grad_norm": 0.5122716359415467, "learning_rate": 3.7529545893381645e-05, "loss": 0.853, "step": 423 }, { "epoch": 0.1689747932649198, "grad_norm": 0.5459289409614204, "learning_rate": 3.7517349035153265e-05, "loss": 0.884, "step": 424 }, { "epoch": 0.16937331872073327, "grad_norm": 0.5242102541749672, "learning_rate": 3.750512413456027e-05, "loss": 0.8657, "step": 425 }, { "epoch": 0.16977184417654678, "grad_norm": 0.4867591923017328, "learning_rate": 3.749287121117271e-05, "loss": 0.8792, "step": 426 }, { "epoch": 0.17017036963236026, "grad_norm": 0.46645737295772005, "learning_rate": 3.7480590284605456e-05, "loss": 0.8555, "step": 427 }, { "epoch": 0.17056889508817374, "grad_norm": 0.5173979998559967, "learning_rate": 3.746828137451825e-05, "loss": 0.8767, "step": 428 }, { "epoch": 0.17096742054398725, "grad_norm": 0.5369165613294684, "learning_rate": 3.74559445006156e-05, "loss": 0.8705, "step": 429 }, { "epoch": 0.17136594599980073, "grad_norm": 0.5189321766211082, "learning_rate": 3.74435796826468e-05, "loss": 0.8903, "step": 430 }, { "epoch": 0.17176447145561421, "grad_norm": 0.5153398576442575, "learning_rate": 3.743118694040585e-05, "loss": 0.856, "step": 431 }, { "epoch": 0.17216299691142772, "grad_norm": 0.6454497262759452, "learning_rate": 3.74187662937315e-05, "loss": 0.9, "step": 432 }, { "epoch": 0.1725615223672412, "grad_norm": 0.49986119364421433, "learning_rate": 3.740631776250712e-05, "loss": 0.8445, "step": 433 }, { "epoch": 0.17296004782305469, "grad_norm": 0.48967274132042343, "learning_rate": 3.7393841366660735e-05, "loss": 0.8767, "step": 434 }, { "epoch": 0.1733585732788682, "grad_norm": 0.45785208420296847, "learning_rate": 3.7381337126165e-05, "loss": 0.9046, "step": 435 }, { "epoch": 0.17375709873468168, "grad_norm": 0.5084392551993347, "learning_rate": 3.736880506103711e-05, "loss": 0.8463, "step": 436 }, { "epoch": 0.17415562419049516, "grad_norm": 0.6260870917802238, "learning_rate": 3.735624519133883e-05, "loss": 0.8526, "step": 437 }, { "epoch": 0.17455414964630867, "grad_norm": 0.667002011430546, "learning_rate": 3.734365753717642e-05, "loss": 0.9163, "step": 438 }, { "epoch": 0.17495267510212215, "grad_norm": 0.5524932335618813, "learning_rate": 3.7331042118700616e-05, "loss": 0.8909, "step": 439 }, { "epoch": 0.17535120055793563, "grad_norm": 0.5179221999500747, "learning_rate": 3.731839895610662e-05, "loss": 0.8491, "step": 440 }, { "epoch": 0.17574972601374914, "grad_norm": 0.6055468639799181, "learning_rate": 3.7305728069634024e-05, "loss": 0.9039, "step": 441 }, { "epoch": 0.17614825146956262, "grad_norm": 0.6369378504491895, "learning_rate": 3.729302947956681e-05, "loss": 0.8699, "step": 442 }, { "epoch": 0.1765467769253761, "grad_norm": 0.517132348583334, "learning_rate": 3.728030320623332e-05, "loss": 0.8747, "step": 443 }, { "epoch": 0.1769453023811896, "grad_norm": 0.4377714733389691, "learning_rate": 3.7267549270006195e-05, "loss": 0.8574, "step": 444 }, { "epoch": 0.1773438278370031, "grad_norm": 0.5519428657517451, "learning_rate": 3.7254767691302366e-05, "loss": 0.8716, "step": 445 }, { "epoch": 0.17774235329281657, "grad_norm": 0.5779289605769454, "learning_rate": 3.724195849058302e-05, "loss": 0.855, "step": 446 }, { "epoch": 0.17814087874863008, "grad_norm": 0.5189071675619338, "learning_rate": 3.722912168835356e-05, "loss": 0.8789, "step": 447 }, { "epoch": 0.17853940420444356, "grad_norm": 0.44907580503791095, "learning_rate": 3.7216257305163576e-05, "loss": 0.8659, "step": 448 }, { "epoch": 0.17893792966025704, "grad_norm": 0.5335537287232798, "learning_rate": 3.7203365361606796e-05, "loss": 0.896, "step": 449 }, { "epoch": 0.17933645511607055, "grad_norm": 0.5493861171202665, "learning_rate": 3.719044587832109e-05, "loss": 0.8547, "step": 450 }, { "epoch": 0.17973498057188403, "grad_norm": 0.4686748664722927, "learning_rate": 3.71774988759884e-05, "loss": 0.8288, "step": 451 }, { "epoch": 0.1801335060276975, "grad_norm": 0.4149387142024727, "learning_rate": 3.716452437533471e-05, "loss": 0.8596, "step": 452 }, { "epoch": 0.18053203148351102, "grad_norm": 0.4325334501517392, "learning_rate": 3.715152239713007e-05, "loss": 0.859, "step": 453 }, { "epoch": 0.1809305569393245, "grad_norm": 0.4976629397106674, "learning_rate": 3.713849296218847e-05, "loss": 0.8789, "step": 454 }, { "epoch": 0.18132908239513798, "grad_norm": 0.507007279338876, "learning_rate": 3.7125436091367866e-05, "loss": 0.8726, "step": 455 }, { "epoch": 0.1817276078509515, "grad_norm": 0.5348993862470603, "learning_rate": 3.711235180557014e-05, "loss": 0.9106, "step": 456 }, { "epoch": 0.18212613330676497, "grad_norm": 0.46294587476217225, "learning_rate": 3.709924012574107e-05, "loss": 0.8358, "step": 457 }, { "epoch": 0.18252465876257845, "grad_norm": 0.43107837967105883, "learning_rate": 3.708610107287026e-05, "loss": 0.8448, "step": 458 }, { "epoch": 0.18292318421839196, "grad_norm": 0.48433441169264524, "learning_rate": 3.7072934667991157e-05, "loss": 0.8677, "step": 459 }, { "epoch": 0.18332170967420544, "grad_norm": 0.5181824793139834, "learning_rate": 3.705974093218099e-05, "loss": 0.8867, "step": 460 }, { "epoch": 0.18372023513001892, "grad_norm": 0.5376360855846708, "learning_rate": 3.704651988656074e-05, "loss": 0.9073, "step": 461 }, { "epoch": 0.18411876058583243, "grad_norm": 0.5000814848716162, "learning_rate": 3.703327155229509e-05, "loss": 0.87, "step": 462 }, { "epoch": 0.1845172860416459, "grad_norm": 0.4780561422951961, "learning_rate": 3.701999595059244e-05, "loss": 0.8614, "step": 463 }, { "epoch": 0.1849158114974594, "grad_norm": 0.4722288774763096, "learning_rate": 3.700669310270481e-05, "loss": 0.8507, "step": 464 }, { "epoch": 0.1853143369532729, "grad_norm": 0.46238619081900495, "learning_rate": 3.699336302992786e-05, "loss": 0.8795, "step": 465 }, { "epoch": 0.18571286240908638, "grad_norm": 0.5217809598476334, "learning_rate": 3.69800057536008e-05, "loss": 0.8679, "step": 466 }, { "epoch": 0.18611138786489986, "grad_norm": 0.5670490274865951, "learning_rate": 3.6966621295106425e-05, "loss": 0.8821, "step": 467 }, { "epoch": 0.18650991332071337, "grad_norm": 0.5541701975380785, "learning_rate": 3.695320967587103e-05, "loss": 0.8671, "step": 468 }, { "epoch": 0.18690843877652685, "grad_norm": 0.48332966121728094, "learning_rate": 3.693977091736438e-05, "loss": 0.8543, "step": 469 }, { "epoch": 0.18730696423234033, "grad_norm": 0.4228426707268364, "learning_rate": 3.6926305041099705e-05, "loss": 0.8421, "step": 470 }, { "epoch": 0.18770548968815384, "grad_norm": 0.4683111306073849, "learning_rate": 3.6912812068633626e-05, "loss": 0.8584, "step": 471 }, { "epoch": 0.18810401514396732, "grad_norm": 0.5422991697909932, "learning_rate": 3.689929202156615e-05, "loss": 0.9349, "step": 472 }, { "epoch": 0.1885025405997808, "grad_norm": 0.474589914149524, "learning_rate": 3.688574492154063e-05, "loss": 0.8683, "step": 473 }, { "epoch": 0.1889010660555943, "grad_norm": 0.4982233301174737, "learning_rate": 3.687217079024371e-05, "loss": 0.8636, "step": 474 }, { "epoch": 0.1892995915114078, "grad_norm": 0.5267276262142256, "learning_rate": 3.6858569649405336e-05, "loss": 0.8559, "step": 475 }, { "epoch": 0.18969811696722128, "grad_norm": 0.40458583321271047, "learning_rate": 3.6844941520798664e-05, "loss": 0.8432, "step": 476 }, { "epoch": 0.19009664242303478, "grad_norm": 0.38424753205506557, "learning_rate": 3.683128642624007e-05, "loss": 0.857, "step": 477 }, { "epoch": 0.19049516787884826, "grad_norm": 0.4997565524770705, "learning_rate": 3.6817604387589086e-05, "loss": 0.8763, "step": 478 }, { "epoch": 0.19089369333466175, "grad_norm": 0.38620309944213566, "learning_rate": 3.680389542674837e-05, "loss": 0.8402, "step": 479 }, { "epoch": 0.19129221879047525, "grad_norm": 0.4217979959268514, "learning_rate": 3.679015956566371e-05, "loss": 0.8921, "step": 480 }, { "epoch": 0.19169074424628874, "grad_norm": 0.6509327369251123, "learning_rate": 3.6776396826323925e-05, "loss": 0.8981, "step": 481 }, { "epoch": 0.19208926970210222, "grad_norm": 0.3957479519147936, "learning_rate": 3.6762607230760884e-05, "loss": 0.887, "step": 482 }, { "epoch": 0.19248779515791573, "grad_norm": 0.3933212374183316, "learning_rate": 3.6748790801049435e-05, "loss": 0.8555, "step": 483 }, { "epoch": 0.1928863206137292, "grad_norm": 0.3942675959179187, "learning_rate": 3.673494755930737e-05, "loss": 0.8619, "step": 484 }, { "epoch": 0.1932848460695427, "grad_norm": 0.4102773938392307, "learning_rate": 3.6721077527695435e-05, "loss": 0.8684, "step": 485 }, { "epoch": 0.19368337152535617, "grad_norm": 0.5537091771770686, "learning_rate": 3.670718072841724e-05, "loss": 0.8657, "step": 486 }, { "epoch": 0.19408189698116968, "grad_norm": 0.4445425000622428, "learning_rate": 3.6693257183719256e-05, "loss": 0.8527, "step": 487 }, { "epoch": 0.19448042243698316, "grad_norm": 0.4048218822376927, "learning_rate": 3.667930691589075e-05, "loss": 0.8786, "step": 488 }, { "epoch": 0.19487894789279664, "grad_norm": 0.4525605726219098, "learning_rate": 3.666532994726381e-05, "loss": 0.8544, "step": 489 }, { "epoch": 0.19527747334861015, "grad_norm": 0.4471569397505119, "learning_rate": 3.665132630021321e-05, "loss": 0.8506, "step": 490 }, { "epoch": 0.19567599880442363, "grad_norm": 0.47638751339784896, "learning_rate": 3.6637295997156475e-05, "loss": 0.887, "step": 491 }, { "epoch": 0.1960745242602371, "grad_norm": 0.5025010909937182, "learning_rate": 3.662323906055379e-05, "loss": 0.8653, "step": 492 }, { "epoch": 0.19647304971605062, "grad_norm": 0.42315599557494776, "learning_rate": 3.6609155512907966e-05, "loss": 0.8531, "step": 493 }, { "epoch": 0.1968715751718641, "grad_norm": 0.4543478055892151, "learning_rate": 3.659504537676444e-05, "loss": 0.8512, "step": 494 }, { "epoch": 0.19727010062767758, "grad_norm": 0.5089668275890759, "learning_rate": 3.658090867471118e-05, "loss": 0.8733, "step": 495 }, { "epoch": 0.1976686260834911, "grad_norm": 0.48725887709055965, "learning_rate": 3.656674542937869e-05, "loss": 0.8629, "step": 496 }, { "epoch": 0.19806715153930457, "grad_norm": 0.5284757567578545, "learning_rate": 3.655255566343999e-05, "loss": 0.8845, "step": 497 }, { "epoch": 0.19846567699511805, "grad_norm": 0.5026058309669479, "learning_rate": 3.653833939961053e-05, "loss": 0.8876, "step": 498 }, { "epoch": 0.19886420245093156, "grad_norm": 0.4169989456283724, "learning_rate": 3.6524096660648186e-05, "loss": 0.8713, "step": 499 }, { "epoch": 0.19926272790674504, "grad_norm": 0.43259318912302097, "learning_rate": 3.650982746935321e-05, "loss": 0.8463, "step": 500 }, { "epoch": 0.19966125336255852, "grad_norm": 0.47387386790106595, "learning_rate": 3.6495531848568206e-05, "loss": 0.8315, "step": 501 }, { "epoch": 0.20005977881837203, "grad_norm": 0.4501351523826911, "learning_rate": 3.6481209821178104e-05, "loss": 0.8628, "step": 502 }, { "epoch": 0.2004583042741855, "grad_norm": 0.5285961857854481, "learning_rate": 3.646686141011008e-05, "loss": 0.8605, "step": 503 }, { "epoch": 0.200856829729999, "grad_norm": 0.40989354815942786, "learning_rate": 3.645248663833354e-05, "loss": 0.8688, "step": 504 }, { "epoch": 0.2012553551858125, "grad_norm": 0.4346950335335224, "learning_rate": 3.643808552886012e-05, "loss": 0.873, "step": 505 }, { "epoch": 0.20165388064162598, "grad_norm": 0.5336085053270726, "learning_rate": 3.6423658104743606e-05, "loss": 0.8593, "step": 506 }, { "epoch": 0.20205240609743946, "grad_norm": 0.4077411294947737, "learning_rate": 3.6409204389079896e-05, "loss": 0.8444, "step": 507 }, { "epoch": 0.20245093155325297, "grad_norm": 0.44445720308169706, "learning_rate": 3.6394724405007e-05, "loss": 0.8636, "step": 508 }, { "epoch": 0.20284945700906645, "grad_norm": 0.3987179137110336, "learning_rate": 3.6380218175704954e-05, "loss": 0.8897, "step": 509 }, { "epoch": 0.20324798246487993, "grad_norm": 0.4428095828153124, "learning_rate": 3.636568572439582e-05, "loss": 0.8471, "step": 510 }, { "epoch": 0.20364650792069344, "grad_norm": 0.46898791636388926, "learning_rate": 3.6351127074343654e-05, "loss": 0.8567, "step": 511 }, { "epoch": 0.20404503337650692, "grad_norm": 0.44474651154582173, "learning_rate": 3.633654224885441e-05, "loss": 0.848, "step": 512 }, { "epoch": 0.2044435588323204, "grad_norm": 0.4312423546670495, "learning_rate": 3.632193127127598e-05, "loss": 0.8693, "step": 513 }, { "epoch": 0.2048420842881339, "grad_norm": 0.49632782286130483, "learning_rate": 3.630729416499813e-05, "loss": 0.8814, "step": 514 }, { "epoch": 0.2052406097439474, "grad_norm": 0.45170716058550536, "learning_rate": 3.6292630953452406e-05, "loss": 0.8685, "step": 515 }, { "epoch": 0.20563913519976088, "grad_norm": 0.5446213353134834, "learning_rate": 3.627794166011219e-05, "loss": 0.8717, "step": 516 }, { "epoch": 0.20603766065557438, "grad_norm": 0.5471560197738125, "learning_rate": 3.626322630849259e-05, "loss": 0.8667, "step": 517 }, { "epoch": 0.20643618611138786, "grad_norm": 0.5858086900062635, "learning_rate": 3.6248484922150445e-05, "loss": 0.8279, "step": 518 }, { "epoch": 0.20683471156720135, "grad_norm": 0.5915507808065805, "learning_rate": 3.6233717524684264e-05, "loss": 0.8647, "step": 519 }, { "epoch": 0.20723323702301485, "grad_norm": 0.5742838245899272, "learning_rate": 3.62189241397342e-05, "loss": 0.8756, "step": 520 }, { "epoch": 0.20763176247882834, "grad_norm": 0.4770900993779875, "learning_rate": 3.620410479098199e-05, "loss": 0.8595, "step": 521 }, { "epoch": 0.20803028793464182, "grad_norm": 0.4639336066600716, "learning_rate": 3.618925950215096e-05, "loss": 0.8539, "step": 522 }, { "epoch": 0.20842881339045533, "grad_norm": 0.5019882836143528, "learning_rate": 3.617438829700595e-05, "loss": 0.8461, "step": 523 }, { "epoch": 0.2088273388462688, "grad_norm": 0.4562491167280308, "learning_rate": 3.615949119935328e-05, "loss": 0.8631, "step": 524 }, { "epoch": 0.2092258643020823, "grad_norm": 0.46086677639660656, "learning_rate": 3.614456823304073e-05, "loss": 0.8489, "step": 525 }, { "epoch": 0.2096243897578958, "grad_norm": 0.44996342982439314, "learning_rate": 3.61296194219575e-05, "loss": 0.8554, "step": 526 }, { "epoch": 0.21002291521370928, "grad_norm": 0.4156003055691938, "learning_rate": 3.6114644790034144e-05, "loss": 0.8566, "step": 527 }, { "epoch": 0.21042144066952276, "grad_norm": 0.4501085849731328, "learning_rate": 3.609964436124255e-05, "loss": 0.8728, "step": 528 }, { "epoch": 0.21081996612533627, "grad_norm": 0.40787146977289557, "learning_rate": 3.6084618159595935e-05, "loss": 0.8667, "step": 529 }, { "epoch": 0.21121849158114975, "grad_norm": 0.474878191977019, "learning_rate": 3.606956620914873e-05, "loss": 0.8295, "step": 530 }, { "epoch": 0.21161701703696323, "grad_norm": 0.46121373114207476, "learning_rate": 3.605448853399661e-05, "loss": 0.8647, "step": 531 }, { "epoch": 0.21201554249277674, "grad_norm": 0.5256057649499315, "learning_rate": 3.603938515827643e-05, "loss": 0.8765, "step": 532 }, { "epoch": 0.21241406794859022, "grad_norm": 0.4296063955695742, "learning_rate": 3.6024256106166194e-05, "loss": 0.8698, "step": 533 }, { "epoch": 0.2128125934044037, "grad_norm": 0.49055349825343775, "learning_rate": 3.600910140188498e-05, "loss": 0.8554, "step": 534 }, { "epoch": 0.2132111188602172, "grad_norm": 0.4028038490785686, "learning_rate": 3.599392106969296e-05, "loss": 0.8797, "step": 535 }, { "epoch": 0.2136096443160307, "grad_norm": 0.4426507424773926, "learning_rate": 3.5978715133891334e-05, "loss": 0.8433, "step": 536 }, { "epoch": 0.21400816977184417, "grad_norm": 0.4408887572324347, "learning_rate": 3.596348361882226e-05, "loss": 0.8919, "step": 537 }, { "epoch": 0.21440669522765768, "grad_norm": 0.4023818298390077, "learning_rate": 3.594822654886888e-05, "loss": 0.8219, "step": 538 }, { "epoch": 0.21480522068347116, "grad_norm": 0.5445602848649418, "learning_rate": 3.593294394845521e-05, "loss": 0.8561, "step": 539 }, { "epoch": 0.21520374613928464, "grad_norm": 0.44164972512016026, "learning_rate": 3.5917635842046165e-05, "loss": 0.8428, "step": 540 }, { "epoch": 0.21560227159509815, "grad_norm": 0.48977170056676267, "learning_rate": 3.590230225414748e-05, "loss": 0.8701, "step": 541 }, { "epoch": 0.21600079705091163, "grad_norm": 0.465180272328864, "learning_rate": 3.588694320930567e-05, "loss": 0.837, "step": 542 }, { "epoch": 0.2163993225067251, "grad_norm": 0.3718782369142703, "learning_rate": 3.5871558732108034e-05, "loss": 0.8491, "step": 543 }, { "epoch": 0.21679784796253862, "grad_norm": 0.4506626708822692, "learning_rate": 3.5856148847182535e-05, "loss": 0.8293, "step": 544 }, { "epoch": 0.2171963734183521, "grad_norm": 0.5210277329620194, "learning_rate": 3.5840713579197856e-05, "loss": 0.8587, "step": 545 }, { "epoch": 0.21759489887416558, "grad_norm": 0.5358427464347824, "learning_rate": 3.5825252952863296e-05, "loss": 0.8251, "step": 546 }, { "epoch": 0.2179934243299791, "grad_norm": 0.48542122022372863, "learning_rate": 3.5809766992928746e-05, "loss": 0.8725, "step": 547 }, { "epoch": 0.21839194978579257, "grad_norm": 0.4243230434228638, "learning_rate": 3.579425572418465e-05, "loss": 0.8518, "step": 548 }, { "epoch": 0.21879047524160605, "grad_norm": 0.4218795984129036, "learning_rate": 3.5778719171461975e-05, "loss": 0.8548, "step": 549 }, { "epoch": 0.21918900069741956, "grad_norm": 0.4590501106129811, "learning_rate": 3.5763157359632164e-05, "loss": 0.8531, "step": 550 }, { "epoch": 0.21958752615323304, "grad_norm": 0.46361499771905873, "learning_rate": 3.574757031360708e-05, "loss": 0.8817, "step": 551 }, { "epoch": 0.21998605160904652, "grad_norm": 0.47793550002117074, "learning_rate": 3.5731958058339e-05, "loss": 0.856, "step": 552 }, { "epoch": 0.22038457706486, "grad_norm": 0.4585859943216561, "learning_rate": 3.571632061882056e-05, "loss": 0.8616, "step": 553 }, { "epoch": 0.2207831025206735, "grad_norm": 0.3864454910550978, "learning_rate": 3.570065802008468e-05, "loss": 0.8621, "step": 554 }, { "epoch": 0.221181627976487, "grad_norm": 0.42677803227423167, "learning_rate": 3.56849702872046e-05, "loss": 0.8824, "step": 555 }, { "epoch": 0.22158015343230048, "grad_norm": 0.4968788156141536, "learning_rate": 3.5669257445293755e-05, "loss": 0.8601, "step": 556 }, { "epoch": 0.22197867888811398, "grad_norm": 0.4839933541994568, "learning_rate": 3.5653519519505803e-05, "loss": 0.852, "step": 557 }, { "epoch": 0.22237720434392746, "grad_norm": 0.43272119648953283, "learning_rate": 3.563775653503455e-05, "loss": 0.8733, "step": 558 }, { "epoch": 0.22277572979974095, "grad_norm": 0.39605938751897557, "learning_rate": 3.562196851711391e-05, "loss": 0.8417, "step": 559 }, { "epoch": 0.22317425525555445, "grad_norm": 0.43460908962065953, "learning_rate": 3.560615549101788e-05, "loss": 0.8443, "step": 560 }, { "epoch": 0.22357278071136794, "grad_norm": 0.49038667322845025, "learning_rate": 3.5590317482060474e-05, "loss": 0.8441, "step": 561 }, { "epoch": 0.22397130616718142, "grad_norm": 0.4896634258033811, "learning_rate": 3.5574454515595735e-05, "loss": 0.8216, "step": 562 }, { "epoch": 0.22436983162299493, "grad_norm": 0.47379532355614734, "learning_rate": 3.5558566617017616e-05, "loss": 0.8664, "step": 563 }, { "epoch": 0.2247683570788084, "grad_norm": 0.4064914983245694, "learning_rate": 3.554265381176e-05, "loss": 0.8195, "step": 564 }, { "epoch": 0.2251668825346219, "grad_norm": 0.44817812465361634, "learning_rate": 3.552671612529667e-05, "loss": 0.8251, "step": 565 }, { "epoch": 0.2255654079904354, "grad_norm": 0.5252162424970518, "learning_rate": 3.5510753583141185e-05, "loss": 0.8873, "step": 566 }, { "epoch": 0.22596393344624888, "grad_norm": 0.5355671371355674, "learning_rate": 3.5494766210846936e-05, "loss": 0.8544, "step": 567 }, { "epoch": 0.22636245890206236, "grad_norm": 0.4819586808295284, "learning_rate": 3.547875403400705e-05, "loss": 0.8619, "step": 568 }, { "epoch": 0.22676098435787587, "grad_norm": 0.3961215921893707, "learning_rate": 3.5462717078254353e-05, "loss": 0.8687, "step": 569 }, { "epoch": 0.22715950981368935, "grad_norm": 0.4108394698195708, "learning_rate": 3.5446655369261355e-05, "loss": 0.8629, "step": 570 }, { "epoch": 0.22755803526950283, "grad_norm": 0.4534157567866205, "learning_rate": 3.543056893274017e-05, "loss": 0.843, "step": 571 }, { "epoch": 0.22795656072531634, "grad_norm": 0.5102875270779772, "learning_rate": 3.541445779444252e-05, "loss": 0.8485, "step": 572 }, { "epoch": 0.22835508618112982, "grad_norm": 0.3859177522136378, "learning_rate": 3.5398321980159666e-05, "loss": 0.8373, "step": 573 }, { "epoch": 0.2287536116369433, "grad_norm": 0.37972963850475683, "learning_rate": 3.5382161515722354e-05, "loss": 0.8741, "step": 574 }, { "epoch": 0.2291521370927568, "grad_norm": 0.4136228500070505, "learning_rate": 3.53659764270008e-05, "loss": 0.8739, "step": 575 }, { "epoch": 0.2295506625485703, "grad_norm": 0.42386454317477146, "learning_rate": 3.534976673990465e-05, "loss": 0.8504, "step": 576 }, { "epoch": 0.22994918800438377, "grad_norm": 0.4131700773814348, "learning_rate": 3.5333532480382915e-05, "loss": 0.8325, "step": 577 }, { "epoch": 0.23034771346019728, "grad_norm": 0.4321055766938808, "learning_rate": 3.5317273674423944e-05, "loss": 0.842, "step": 578 }, { "epoch": 0.23074623891601076, "grad_norm": 0.4452054733522704, "learning_rate": 3.5300990348055385e-05, "loss": 0.8826, "step": 579 }, { "epoch": 0.23114476437182424, "grad_norm": 0.4096599637303119, "learning_rate": 3.528468252734414e-05, "loss": 0.8633, "step": 580 }, { "epoch": 0.23154328982763775, "grad_norm": 0.4122953744704833, "learning_rate": 3.526835023839632e-05, "loss": 0.8772, "step": 581 }, { "epoch": 0.23194181528345123, "grad_norm": 0.4547152883012281, "learning_rate": 3.52519935073572e-05, "loss": 0.8613, "step": 582 }, { "epoch": 0.2323403407392647, "grad_norm": 0.4725670891982683, "learning_rate": 3.5235612360411196e-05, "loss": 0.8819, "step": 583 }, { "epoch": 0.23273886619507822, "grad_norm": 0.40729982125282965, "learning_rate": 3.521920682378179e-05, "loss": 0.8471, "step": 584 }, { "epoch": 0.2331373916508917, "grad_norm": 0.4348949494906739, "learning_rate": 3.520277692373154e-05, "loss": 0.8682, "step": 585 }, { "epoch": 0.23353591710670518, "grad_norm": 0.4881551767292844, "learning_rate": 3.518632268656196e-05, "loss": 0.8408, "step": 586 }, { "epoch": 0.2339344425625187, "grad_norm": 0.5373093582603797, "learning_rate": 3.516984413861357e-05, "loss": 0.8646, "step": 587 }, { "epoch": 0.23433296801833217, "grad_norm": 0.4789730876955116, "learning_rate": 3.5153341306265775e-05, "loss": 0.8489, "step": 588 }, { "epoch": 0.23473149347414565, "grad_norm": 0.5957598632234159, "learning_rate": 3.5136814215936864e-05, "loss": 0.8478, "step": 589 }, { "epoch": 0.23513001892995916, "grad_norm": 0.6296888663536283, "learning_rate": 3.512026289408398e-05, "loss": 0.866, "step": 590 }, { "epoch": 0.23552854438577264, "grad_norm": 0.5086372892787441, "learning_rate": 3.5103687367203025e-05, "loss": 0.8893, "step": 591 }, { "epoch": 0.23592706984158612, "grad_norm": 0.4732493082235356, "learning_rate": 3.508708766182866e-05, "loss": 0.8435, "step": 592 }, { "epoch": 0.23632559529739963, "grad_norm": 0.5470935688327907, "learning_rate": 3.507046380453426e-05, "loss": 0.8572, "step": 593 }, { "epoch": 0.2367241207532131, "grad_norm": 0.5928161194589755, "learning_rate": 3.5053815821931865e-05, "loss": 0.8991, "step": 594 }, { "epoch": 0.2371226462090266, "grad_norm": 0.541542878613048, "learning_rate": 3.503714374067212e-05, "loss": 0.843, "step": 595 }, { "epoch": 0.2375211716648401, "grad_norm": 0.5945037136372829, "learning_rate": 3.502044758744425e-05, "loss": 0.8313, "step": 596 }, { "epoch": 0.23791969712065358, "grad_norm": 0.5262860873148738, "learning_rate": 3.500372738897603e-05, "loss": 0.8302, "step": 597 }, { "epoch": 0.23831822257646706, "grad_norm": 0.47043036160591684, "learning_rate": 3.498698317203372e-05, "loss": 0.8483, "step": 598 }, { "epoch": 0.23871674803228057, "grad_norm": 0.48587903119210246, "learning_rate": 3.497021496342203e-05, "loss": 0.8435, "step": 599 }, { "epoch": 0.23911527348809405, "grad_norm": 0.5044732980078849, "learning_rate": 3.495342278998406e-05, "loss": 0.828, "step": 600 }, { "epoch": 0.23951379894390754, "grad_norm": 0.4739216086634541, "learning_rate": 3.493660667860131e-05, "loss": 0.8077, "step": 601 }, { "epoch": 0.23991232439972104, "grad_norm": 0.5102507150713297, "learning_rate": 3.4919766656193576e-05, "loss": 0.8558, "step": 602 }, { "epoch": 0.24031084985553453, "grad_norm": 0.4940960518342556, "learning_rate": 3.490290274971892e-05, "loss": 0.8655, "step": 603 }, { "epoch": 0.240709375311348, "grad_norm": 0.4976450093350724, "learning_rate": 3.488601498617367e-05, "loss": 0.8451, "step": 604 }, { "epoch": 0.24110790076716151, "grad_norm": 0.5068077897232314, "learning_rate": 3.486910339259231e-05, "loss": 0.8424, "step": 605 }, { "epoch": 0.241506426222975, "grad_norm": 0.40217075451363676, "learning_rate": 3.485216799604752e-05, "loss": 0.8766, "step": 606 }, { "epoch": 0.24190495167878848, "grad_norm": 0.41405398864625936, "learning_rate": 3.483520882365003e-05, "loss": 0.8295, "step": 607 }, { "epoch": 0.24230347713460199, "grad_norm": 0.45479094126766634, "learning_rate": 3.4818225902548666e-05, "loss": 0.8832, "step": 608 }, { "epoch": 0.24270200259041547, "grad_norm": 0.44930048442037135, "learning_rate": 3.480121925993026e-05, "loss": 0.8775, "step": 609 }, { "epoch": 0.24310052804622895, "grad_norm": 0.4305314388039683, "learning_rate": 3.478418892301962e-05, "loss": 0.8585, "step": 610 }, { "epoch": 0.24349905350204246, "grad_norm": 0.42635507279318796, "learning_rate": 3.47671349190795e-05, "loss": 0.8748, "step": 611 }, { "epoch": 0.24389757895785594, "grad_norm": 0.4470652779000305, "learning_rate": 3.475005727541049e-05, "loss": 0.8707, "step": 612 }, { "epoch": 0.24429610441366942, "grad_norm": 0.6860613528881833, "learning_rate": 3.4732956019351105e-05, "loss": 0.8586, "step": 613 }, { "epoch": 0.24469462986948293, "grad_norm": 0.4514815132734232, "learning_rate": 3.471583117827758e-05, "loss": 0.847, "step": 614 }, { "epoch": 0.2450931553252964, "grad_norm": 0.405387622879431, "learning_rate": 3.469868277960395e-05, "loss": 0.8537, "step": 615 }, { "epoch": 0.2454916807811099, "grad_norm": 0.45894128089045466, "learning_rate": 3.468151085078196e-05, "loss": 0.8329, "step": 616 }, { "epoch": 0.24589020623692337, "grad_norm": 0.5102574940014621, "learning_rate": 3.4664315419301e-05, "loss": 0.8407, "step": 617 }, { "epoch": 0.24628873169273688, "grad_norm": 0.44535784146833973, "learning_rate": 3.464709651268811e-05, "loss": 0.8503, "step": 618 }, { "epoch": 0.24668725714855036, "grad_norm": 0.43055173741202407, "learning_rate": 3.4629854158507884e-05, "loss": 0.8685, "step": 619 }, { "epoch": 0.24708578260436384, "grad_norm": 0.44729573957137375, "learning_rate": 3.461258838436248e-05, "loss": 0.8708, "step": 620 }, { "epoch": 0.24748430806017735, "grad_norm": 0.4062311195130286, "learning_rate": 3.459529921789153e-05, "loss": 0.824, "step": 621 }, { "epoch": 0.24788283351599083, "grad_norm": 0.4359478505964142, "learning_rate": 3.457798668677211e-05, "loss": 0.849, "step": 622 }, { "epoch": 0.2482813589718043, "grad_norm": 0.4269566124271948, "learning_rate": 3.456065081871871e-05, "loss": 0.8504, "step": 623 }, { "epoch": 0.24867988442761782, "grad_norm": 0.39280331015093617, "learning_rate": 3.454329164148317e-05, "loss": 0.8529, "step": 624 }, { "epoch": 0.2490784098834313, "grad_norm": 0.414050219224192, "learning_rate": 3.452590918285465e-05, "loss": 0.871, "step": 625 }, { "epoch": 0.24947693533924478, "grad_norm": 0.4021318325147454, "learning_rate": 3.450850347065958e-05, "loss": 0.841, "step": 626 }, { "epoch": 0.2498754607950583, "grad_norm": 0.4120701796015395, "learning_rate": 3.4491074532761614e-05, "loss": 0.8261, "step": 627 }, { "epoch": 0.25027398625087177, "grad_norm": 0.42792903386869047, "learning_rate": 3.4473622397061576e-05, "loss": 0.8366, "step": 628 }, { "epoch": 0.25067251170668525, "grad_norm": 0.5026276371812628, "learning_rate": 3.445614709149744e-05, "loss": 0.8797, "step": 629 }, { "epoch": 0.25107103716249873, "grad_norm": 0.42307765492760363, "learning_rate": 3.443864864404427e-05, "loss": 0.8333, "step": 630 }, { "epoch": 0.25146956261831227, "grad_norm": 0.43146499355102447, "learning_rate": 3.4421127082714165e-05, "loss": 0.8745, "step": 631 }, { "epoch": 0.25186808807412575, "grad_norm": 0.4232386337048391, "learning_rate": 3.4403582435556235e-05, "loss": 0.8615, "step": 632 }, { "epoch": 0.25226661352993923, "grad_norm": 0.39549286132767947, "learning_rate": 3.4386014730656554e-05, "loss": 0.852, "step": 633 }, { "epoch": 0.2526651389857527, "grad_norm": 0.37990676255356576, "learning_rate": 3.436842399613808e-05, "loss": 0.8667, "step": 634 }, { "epoch": 0.2530636644415662, "grad_norm": 0.35437344682645827, "learning_rate": 3.435081026016067e-05, "loss": 0.8629, "step": 635 }, { "epoch": 0.2534621898973797, "grad_norm": 0.4072267228198412, "learning_rate": 3.433317355092098e-05, "loss": 0.863, "step": 636 }, { "epoch": 0.2538607153531932, "grad_norm": 0.4087915210981998, "learning_rate": 3.431551389665246e-05, "loss": 0.8629, "step": 637 }, { "epoch": 0.2542592408090067, "grad_norm": 0.39541112177531035, "learning_rate": 3.429783132562527e-05, "loss": 0.8431, "step": 638 }, { "epoch": 0.2546577662648202, "grad_norm": 0.449324447165349, "learning_rate": 3.428012586614628e-05, "loss": 0.8301, "step": 639 }, { "epoch": 0.25505629172063365, "grad_norm": 0.438103934508987, "learning_rate": 3.426239754655898e-05, "loss": 0.8346, "step": 640 }, { "epoch": 0.25545481717644714, "grad_norm": 0.4080543057741031, "learning_rate": 3.4244646395243456e-05, "loss": 0.8199, "step": 641 }, { "epoch": 0.2558533426322606, "grad_norm": 0.4781805788640452, "learning_rate": 3.422687244061636e-05, "loss": 0.8396, "step": 642 }, { "epoch": 0.25625186808807415, "grad_norm": 0.39665434242169373, "learning_rate": 3.420907571113085e-05, "loss": 0.8738, "step": 643 }, { "epoch": 0.25665039354388763, "grad_norm": 0.44427399502026793, "learning_rate": 3.419125623527651e-05, "loss": 0.8276, "step": 644 }, { "epoch": 0.2570489189997011, "grad_norm": 0.4971093826856599, "learning_rate": 3.417341404157938e-05, "loss": 0.844, "step": 645 }, { "epoch": 0.2574474444555146, "grad_norm": 0.40433006793477544, "learning_rate": 3.415554915860184e-05, "loss": 0.8515, "step": 646 }, { "epoch": 0.2578459699113281, "grad_norm": 0.41435122876017727, "learning_rate": 3.413766161494259e-05, "loss": 0.8504, "step": 647 }, { "epoch": 0.25824449536714156, "grad_norm": 0.4025721405079423, "learning_rate": 3.411975143923662e-05, "loss": 0.8003, "step": 648 }, { "epoch": 0.25864302082295504, "grad_norm": 0.4230151107223422, "learning_rate": 3.410181866015515e-05, "loss": 0.8253, "step": 649 }, { "epoch": 0.2590415462787686, "grad_norm": 0.43018219174517974, "learning_rate": 3.4083863306405576e-05, "loss": 0.8494, "step": 650 }, { "epoch": 0.25944007173458206, "grad_norm": 0.5580571782658815, "learning_rate": 3.406588540673143e-05, "loss": 0.839, "step": 651 }, { "epoch": 0.25983859719039554, "grad_norm": 0.40240838407878654, "learning_rate": 3.4047884989912355e-05, "loss": 0.8295, "step": 652 }, { "epoch": 0.260237122646209, "grad_norm": 0.42705376431218756, "learning_rate": 3.402986208476401e-05, "loss": 0.8513, "step": 653 }, { "epoch": 0.2606356481020225, "grad_norm": 0.37891252038962947, "learning_rate": 3.4011816720138076e-05, "loss": 0.8551, "step": 654 }, { "epoch": 0.261034173557836, "grad_norm": 0.4742754786354608, "learning_rate": 3.39937489249222e-05, "loss": 0.8494, "step": 655 }, { "epoch": 0.2614326990136495, "grad_norm": 0.5757481855161607, "learning_rate": 3.3975658728039894e-05, "loss": 0.866, "step": 656 }, { "epoch": 0.261831224469463, "grad_norm": 0.41879176964003356, "learning_rate": 3.395754615845057e-05, "loss": 0.8199, "step": 657 }, { "epoch": 0.2622297499252765, "grad_norm": 0.3977116381507401, "learning_rate": 3.393941124514944e-05, "loss": 0.8464, "step": 658 }, { "epoch": 0.26262827538108996, "grad_norm": 0.4361036030052378, "learning_rate": 3.3921254017167485e-05, "loss": 0.8554, "step": 659 }, { "epoch": 0.26302680083690344, "grad_norm": 0.36947748546095344, "learning_rate": 3.3903074503571414e-05, "loss": 0.8332, "step": 660 }, { "epoch": 0.2634253262927169, "grad_norm": 0.39322680162826995, "learning_rate": 3.3884872733463605e-05, "loss": 0.8522, "step": 661 }, { "epoch": 0.26382385174853046, "grad_norm": 0.4426408711257021, "learning_rate": 3.386664873598206e-05, "loss": 0.8439, "step": 662 }, { "epoch": 0.26422237720434394, "grad_norm": 0.40481569528280453, "learning_rate": 3.384840254030039e-05, "loss": 0.8463, "step": 663 }, { "epoch": 0.2646209026601574, "grad_norm": 0.486897366169285, "learning_rate": 3.3830134175627694e-05, "loss": 0.8383, "step": 664 }, { "epoch": 0.2650194281159709, "grad_norm": 0.4124318747978423, "learning_rate": 3.3811843671208604e-05, "loss": 0.8341, "step": 665 }, { "epoch": 0.2654179535717844, "grad_norm": 0.4480853051751989, "learning_rate": 3.379353105632318e-05, "loss": 0.8719, "step": 666 }, { "epoch": 0.26581647902759786, "grad_norm": 0.4075223126165696, "learning_rate": 3.3775196360286864e-05, "loss": 0.825, "step": 667 }, { "epoch": 0.2662150044834114, "grad_norm": 0.4598432178350243, "learning_rate": 3.375683961245047e-05, "loss": 0.8459, "step": 668 }, { "epoch": 0.2666135299392249, "grad_norm": 0.4747860282082611, "learning_rate": 3.3738460842200095e-05, "loss": 0.8448, "step": 669 }, { "epoch": 0.26701205539503836, "grad_norm": 0.42550536631714303, "learning_rate": 3.37200600789571e-05, "loss": 0.8482, "step": 670 }, { "epoch": 0.26741058085085184, "grad_norm": 0.5014696923841511, "learning_rate": 3.3701637352178035e-05, "loss": 0.839, "step": 671 }, { "epoch": 0.2678091063066653, "grad_norm": 0.44071644150719574, "learning_rate": 3.368319269135464e-05, "loss": 0.8499, "step": 672 }, { "epoch": 0.2682076317624788, "grad_norm": 0.45694183948733363, "learning_rate": 3.366472612601374e-05, "loss": 0.8495, "step": 673 }, { "epoch": 0.26860615721829234, "grad_norm": 0.45776428701146005, "learning_rate": 3.364623768571725e-05, "loss": 0.8683, "step": 674 }, { "epoch": 0.2690046826741058, "grad_norm": 0.4300670256635499, "learning_rate": 3.3627727400062074e-05, "loss": 0.8409, "step": 675 }, { "epoch": 0.2694032081299193, "grad_norm": 0.4522484813223993, "learning_rate": 3.360919529868012e-05, "loss": 0.8549, "step": 676 }, { "epoch": 0.2698017335857328, "grad_norm": 0.46483110883882417, "learning_rate": 3.3590641411238184e-05, "loss": 0.8316, "step": 677 }, { "epoch": 0.27020025904154626, "grad_norm": 0.46516087115887955, "learning_rate": 3.3572065767437974e-05, "loss": 0.847, "step": 678 }, { "epoch": 0.27059878449735975, "grad_norm": 0.4870114489474851, "learning_rate": 3.355346839701601e-05, "loss": 0.866, "step": 679 }, { "epoch": 0.2709973099531733, "grad_norm": 0.4112151077893339, "learning_rate": 3.353484932974357e-05, "loss": 0.8747, "step": 680 }, { "epoch": 0.27139583540898676, "grad_norm": 0.39988331169551145, "learning_rate": 3.35162085954267e-05, "loss": 0.8491, "step": 681 }, { "epoch": 0.27179436086480024, "grad_norm": 0.4580861040010356, "learning_rate": 3.3497546223906114e-05, "loss": 0.8373, "step": 682 }, { "epoch": 0.2721928863206137, "grad_norm": 0.4676988585541286, "learning_rate": 3.347886224505718e-05, "loss": 0.8562, "step": 683 }, { "epoch": 0.2725914117764272, "grad_norm": 0.3815018026041965, "learning_rate": 3.346015668878982e-05, "loss": 0.8865, "step": 684 }, { "epoch": 0.2729899372322407, "grad_norm": 0.3853282548165928, "learning_rate": 3.3441429585048544e-05, "loss": 0.8451, "step": 685 }, { "epoch": 0.2733884626880542, "grad_norm": 0.46857379361810175, "learning_rate": 3.342268096381233e-05, "loss": 0.8343, "step": 686 }, { "epoch": 0.2737869881438677, "grad_norm": 0.44893908766670865, "learning_rate": 3.340391085509458e-05, "loss": 0.8425, "step": 687 }, { "epoch": 0.2741855135996812, "grad_norm": 0.4623804261603112, "learning_rate": 3.338511928894315e-05, "loss": 0.8752, "step": 688 }, { "epoch": 0.27458403905549467, "grad_norm": 0.40030690241398437, "learning_rate": 3.3366306295440195e-05, "loss": 0.8854, "step": 689 }, { "epoch": 0.27498256451130815, "grad_norm": 0.41617160670796793, "learning_rate": 3.3347471904702196e-05, "loss": 0.8976, "step": 690 }, { "epoch": 0.27538108996712163, "grad_norm": 0.4056939768327828, "learning_rate": 3.3328616146879886e-05, "loss": 0.872, "step": 691 }, { "epoch": 0.27577961542293516, "grad_norm": 0.37847852674838545, "learning_rate": 3.33097390521582e-05, "loss": 0.8155, "step": 692 }, { "epoch": 0.27617814087874865, "grad_norm": 0.35872927161364443, "learning_rate": 3.329084065075622e-05, "loss": 0.8273, "step": 693 }, { "epoch": 0.2765766663345621, "grad_norm": 0.39096155431724333, "learning_rate": 3.327192097292715e-05, "loss": 0.8581, "step": 694 }, { "epoch": 0.2769751917903756, "grad_norm": 0.3861177159461641, "learning_rate": 3.325298004895826e-05, "loss": 0.8132, "step": 695 }, { "epoch": 0.2773737172461891, "grad_norm": 0.4171747417597138, "learning_rate": 3.323401790917082e-05, "loss": 0.8347, "step": 696 }, { "epoch": 0.27777224270200257, "grad_norm": 0.364670807824471, "learning_rate": 3.321503458392005e-05, "loss": 0.8415, "step": 697 }, { "epoch": 0.2781707681578161, "grad_norm": 0.331401074927844, "learning_rate": 3.3196030103595105e-05, "loss": 0.8459, "step": 698 }, { "epoch": 0.2785692936136296, "grad_norm": 0.43255738046602604, "learning_rate": 3.317700449861901e-05, "loss": 0.8335, "step": 699 }, { "epoch": 0.27896781906944307, "grad_norm": 0.33456506773762923, "learning_rate": 3.315795779944858e-05, "loss": 0.8647, "step": 700 }, { "epoch": 0.27936634452525655, "grad_norm": 0.3715707582620995, "learning_rate": 3.313889003657443e-05, "loss": 0.8547, "step": 701 }, { "epoch": 0.27976486998107003, "grad_norm": 0.3331498560093925, "learning_rate": 3.311980124052087e-05, "loss": 0.8447, "step": 702 }, { "epoch": 0.2801633954368835, "grad_norm": 0.4038630202134111, "learning_rate": 3.3100691441845896e-05, "loss": 0.8247, "step": 703 }, { "epoch": 0.28056192089269705, "grad_norm": 0.365237203718338, "learning_rate": 3.308156067114111e-05, "loss": 0.8737, "step": 704 }, { "epoch": 0.28096044634851053, "grad_norm": 0.4002592791047349, "learning_rate": 3.3062408959031715e-05, "loss": 0.8478, "step": 705 }, { "epoch": 0.281358971804324, "grad_norm": 0.34357520687563103, "learning_rate": 3.304323633617641e-05, "loss": 0.8233, "step": 706 }, { "epoch": 0.2817574972601375, "grad_norm": 0.3505454925796206, "learning_rate": 3.3024042833267357e-05, "loss": 0.8281, "step": 707 }, { "epoch": 0.28215602271595097, "grad_norm": 0.35854787844493347, "learning_rate": 3.3004828481030197e-05, "loss": 0.8314, "step": 708 }, { "epoch": 0.28255454817176445, "grad_norm": 0.3633810116569549, "learning_rate": 3.2985593310223905e-05, "loss": 0.8337, "step": 709 }, { "epoch": 0.282953073627578, "grad_norm": 0.40905086354028014, "learning_rate": 3.296633735164078e-05, "loss": 0.8278, "step": 710 }, { "epoch": 0.28335159908339147, "grad_norm": 0.39198864644450826, "learning_rate": 3.294706063610642e-05, "loss": 0.8495, "step": 711 }, { "epoch": 0.28375012453920495, "grad_norm": 0.39676678952183586, "learning_rate": 3.292776319447965e-05, "loss": 0.841, "step": 712 }, { "epoch": 0.28414864999501843, "grad_norm": 0.4904457094152149, "learning_rate": 3.290844505765246e-05, "loss": 0.8538, "step": 713 }, { "epoch": 0.2845471754508319, "grad_norm": 0.38619534462184524, "learning_rate": 3.288910625654997e-05, "loss": 0.831, "step": 714 }, { "epoch": 0.2849457009066454, "grad_norm": 0.3965911327088796, "learning_rate": 3.28697468221304e-05, "loss": 0.855, "step": 715 }, { "epoch": 0.2853442263624589, "grad_norm": 0.4104504182776709, "learning_rate": 3.2850366785384975e-05, "loss": 0.8312, "step": 716 }, { "epoch": 0.2857427518182724, "grad_norm": 0.39320803615560024, "learning_rate": 3.2830966177337926e-05, "loss": 0.8256, "step": 717 }, { "epoch": 0.2861412772740859, "grad_norm": 0.36766055059184494, "learning_rate": 3.281154502904639e-05, "loss": 0.8612, "step": 718 }, { "epoch": 0.2865398027298994, "grad_norm": 0.3523821293496536, "learning_rate": 3.279210337160041e-05, "loss": 0.8546, "step": 719 }, { "epoch": 0.28693832818571285, "grad_norm": 0.4303479446087632, "learning_rate": 3.277264123612283e-05, "loss": 0.843, "step": 720 }, { "epoch": 0.28733685364152634, "grad_norm": 0.37256602383763016, "learning_rate": 3.275315865376932e-05, "loss": 0.8525, "step": 721 }, { "epoch": 0.2877353790973398, "grad_norm": 0.372312406331151, "learning_rate": 3.273365565572824e-05, "loss": 0.8718, "step": 722 }, { "epoch": 0.28813390455315335, "grad_norm": 0.3748404787253373, "learning_rate": 3.271413227322064e-05, "loss": 0.8284, "step": 723 }, { "epoch": 0.28853243000896683, "grad_norm": 0.40949697147874353, "learning_rate": 3.269458853750023e-05, "loss": 0.8342, "step": 724 }, { "epoch": 0.2889309554647803, "grad_norm": 0.35759282756001504, "learning_rate": 3.267502447985328e-05, "loss": 0.8376, "step": 725 }, { "epoch": 0.2893294809205938, "grad_norm": 0.424890270877448, "learning_rate": 3.2655440131598585e-05, "loss": 0.8144, "step": 726 }, { "epoch": 0.2897280063764073, "grad_norm": 0.37228222071530115, "learning_rate": 3.263583552408744e-05, "loss": 0.8203, "step": 727 }, { "epoch": 0.29012653183222076, "grad_norm": 0.36804439864776206, "learning_rate": 3.261621068870355e-05, "loss": 0.8436, "step": 728 }, { "epoch": 0.2905250572880343, "grad_norm": 0.4010864307131854, "learning_rate": 3.2596565656863036e-05, "loss": 0.8211, "step": 729 }, { "epoch": 0.2909235827438478, "grad_norm": 0.43321148633091444, "learning_rate": 3.257690046001431e-05, "loss": 0.8659, "step": 730 }, { "epoch": 0.29132210819966126, "grad_norm": 0.37678425829862483, "learning_rate": 3.255721512963811e-05, "loss": 0.8549, "step": 731 }, { "epoch": 0.29172063365547474, "grad_norm": 0.38473774610717565, "learning_rate": 3.253750969724735e-05, "loss": 0.8584, "step": 732 }, { "epoch": 0.2921191591112882, "grad_norm": 0.3274732323738536, "learning_rate": 3.251778419438716e-05, "loss": 0.8197, "step": 733 }, { "epoch": 0.2925176845671017, "grad_norm": 0.37385182013341806, "learning_rate": 3.2498038652634797e-05, "loss": 0.8485, "step": 734 }, { "epoch": 0.29291621002291524, "grad_norm": 0.37571422954043315, "learning_rate": 3.2478273103599587e-05, "loss": 0.8131, "step": 735 }, { "epoch": 0.2933147354787287, "grad_norm": 1.0205773925944017, "learning_rate": 3.24584875789229e-05, "loss": 0.8122, "step": 736 }, { "epoch": 0.2937132609345422, "grad_norm": 0.397474423244844, "learning_rate": 3.243868211027807e-05, "loss": 0.8575, "step": 737 }, { "epoch": 0.2941117863903557, "grad_norm": 0.35542654634964194, "learning_rate": 3.241885672937034e-05, "loss": 0.8459, "step": 738 }, { "epoch": 0.29451031184616916, "grad_norm": 0.7079812695011942, "learning_rate": 3.239901146793688e-05, "loss": 0.8235, "step": 739 }, { "epoch": 0.29490883730198264, "grad_norm": 0.40472908559410964, "learning_rate": 3.237914635774664e-05, "loss": 0.8358, "step": 740 }, { "epoch": 0.2953073627577962, "grad_norm": 0.6704919581462614, "learning_rate": 3.235926143060036e-05, "loss": 0.881, "step": 741 }, { "epoch": 0.29570588821360966, "grad_norm": 0.373533664396295, "learning_rate": 3.23393567183305e-05, "loss": 0.853, "step": 742 }, { "epoch": 0.29610441366942314, "grad_norm": 0.4047009515080516, "learning_rate": 3.231943225280121e-05, "loss": 0.8569, "step": 743 }, { "epoch": 0.2965029391252366, "grad_norm": 0.3877536209778869, "learning_rate": 3.229948806590824e-05, "loss": 0.835, "step": 744 }, { "epoch": 0.2969014645810501, "grad_norm": 0.4714038839534881, "learning_rate": 3.227952418957892e-05, "loss": 0.868, "step": 745 }, { "epoch": 0.2972999900368636, "grad_norm": 0.4463329373269963, "learning_rate": 3.225954065577209e-05, "loss": 0.848, "step": 746 }, { "epoch": 0.2976985154926771, "grad_norm": 0.42587530691745, "learning_rate": 3.223953749647807e-05, "loss": 0.8607, "step": 747 }, { "epoch": 0.2980970409484906, "grad_norm": 0.4379931392773523, "learning_rate": 3.221951474371861e-05, "loss": 0.813, "step": 748 }, { "epoch": 0.2984955664043041, "grad_norm": 0.38309480692550185, "learning_rate": 3.2199472429546785e-05, "loss": 0.8474, "step": 749 }, { "epoch": 0.29889409186011756, "grad_norm": 0.3616798063850079, "learning_rate": 3.2179410586047025e-05, "loss": 0.8154, "step": 750 }, { "epoch": 0.29929261731593104, "grad_norm": 0.3747541200969163, "learning_rate": 3.215932924533501e-05, "loss": 0.8378, "step": 751 }, { "epoch": 0.2996911427717445, "grad_norm": 0.38031077846694633, "learning_rate": 3.213922843955762e-05, "loss": 0.8543, "step": 752 }, { "epoch": 0.30008966822755806, "grad_norm": 0.41068418371221344, "learning_rate": 3.21191082008929e-05, "loss": 0.8392, "step": 753 }, { "epoch": 0.30048819368337154, "grad_norm": 0.3644597909816924, "learning_rate": 3.2098968561550024e-05, "loss": 0.8061, "step": 754 }, { "epoch": 0.300886719139185, "grad_norm": 0.37311229876996665, "learning_rate": 3.2078809553769195e-05, "loss": 0.8693, "step": 755 }, { "epoch": 0.3012852445949985, "grad_norm": 0.45016158998524075, "learning_rate": 3.205863120982164e-05, "loss": 0.8602, "step": 756 }, { "epoch": 0.301683770050812, "grad_norm": 0.42629280896654315, "learning_rate": 3.203843356200952e-05, "loss": 0.8532, "step": 757 }, { "epoch": 0.30208229550662546, "grad_norm": 0.4110371155650319, "learning_rate": 3.201821664266595e-05, "loss": 0.8451, "step": 758 }, { "epoch": 0.302480820962439, "grad_norm": 0.4192137078636866, "learning_rate": 3.199798048415481e-05, "loss": 0.8436, "step": 759 }, { "epoch": 0.3028793464182525, "grad_norm": 0.4446866796453996, "learning_rate": 3.197772511887086e-05, "loss": 0.8235, "step": 760 }, { "epoch": 0.30327787187406596, "grad_norm": 0.433556905913176, "learning_rate": 3.195745057923957e-05, "loss": 0.8603, "step": 761 }, { "epoch": 0.30367639732987944, "grad_norm": 0.4114711662961495, "learning_rate": 3.193715689771709e-05, "loss": 0.838, "step": 762 }, { "epoch": 0.3040749227856929, "grad_norm": 0.3926214986996156, "learning_rate": 3.191684410679025e-05, "loss": 0.8502, "step": 763 }, { "epoch": 0.3044734482415064, "grad_norm": 0.4139928341021709, "learning_rate": 3.189651223897644e-05, "loss": 0.8385, "step": 764 }, { "epoch": 0.30487197369731994, "grad_norm": 0.4129548938591373, "learning_rate": 3.1876161326823615e-05, "loss": 0.8791, "step": 765 }, { "epoch": 0.3052704991531334, "grad_norm": 0.3955272894598311, "learning_rate": 3.185579140291019e-05, "loss": 0.8384, "step": 766 }, { "epoch": 0.3056690246089469, "grad_norm": 0.3585005878079346, "learning_rate": 3.183540249984504e-05, "loss": 0.8132, "step": 767 }, { "epoch": 0.3060675500647604, "grad_norm": 0.4212205077030527, "learning_rate": 3.18149946502674e-05, "loss": 0.8308, "step": 768 }, { "epoch": 0.30646607552057387, "grad_norm": 0.3638728218380253, "learning_rate": 3.179456788684685e-05, "loss": 0.8097, "step": 769 }, { "epoch": 0.30686460097638735, "grad_norm": 0.4024379131636804, "learning_rate": 3.1774122242283236e-05, "loss": 0.8401, "step": 770 }, { "epoch": 0.3072631264322009, "grad_norm": 0.4132201236498637, "learning_rate": 3.175365774930665e-05, "loss": 0.8111, "step": 771 }, { "epoch": 0.30766165188801436, "grad_norm": 0.3525657580163014, "learning_rate": 3.1733174440677346e-05, "loss": 0.8201, "step": 772 }, { "epoch": 0.30806017734382785, "grad_norm": 0.35083612349906135, "learning_rate": 3.171267234918568e-05, "loss": 0.815, "step": 773 }, { "epoch": 0.3084587027996413, "grad_norm": 0.4002385012230293, "learning_rate": 3.169215150765211e-05, "loss": 0.8168, "step": 774 }, { "epoch": 0.3088572282554548, "grad_norm": 0.3629878196057507, "learning_rate": 3.1671611948927074e-05, "loss": 0.8367, "step": 775 }, { "epoch": 0.3092557537112683, "grad_norm": 0.34583616562695413, "learning_rate": 3.165105370589102e-05, "loss": 0.8253, "step": 776 }, { "epoch": 0.3096542791670818, "grad_norm": 0.3607827655628309, "learning_rate": 3.1630476811454246e-05, "loss": 0.8284, "step": 777 }, { "epoch": 0.3100528046228953, "grad_norm": 0.37546990727594654, "learning_rate": 3.160988129855697e-05, "loss": 0.8376, "step": 778 }, { "epoch": 0.3104513300787088, "grad_norm": 0.39969610145426393, "learning_rate": 3.158926720016917e-05, "loss": 0.8516, "step": 779 }, { "epoch": 0.31084985553452227, "grad_norm": 0.36953469600153793, "learning_rate": 3.156863454929059e-05, "loss": 0.8236, "step": 780 }, { "epoch": 0.31124838099033575, "grad_norm": 0.33928479120444516, "learning_rate": 3.154798337895067e-05, "loss": 0.8443, "step": 781 }, { "epoch": 0.31164690644614923, "grad_norm": 0.3966330597527675, "learning_rate": 3.152731372220852e-05, "loss": 0.8188, "step": 782 }, { "epoch": 0.3120454319019627, "grad_norm": 0.3946127272938953, "learning_rate": 3.1506625612152814e-05, "loss": 0.832, "step": 783 }, { "epoch": 0.31244395735777625, "grad_norm": 0.3785322567375632, "learning_rate": 3.148591908190178e-05, "loss": 0.8393, "step": 784 }, { "epoch": 0.31284248281358973, "grad_norm": 0.36331251784056434, "learning_rate": 3.1465194164603135e-05, "loss": 0.8403, "step": 785 }, { "epoch": 0.3132410082694032, "grad_norm": 0.35684726071521566, "learning_rate": 3.1444450893434025e-05, "loss": 0.8464, "step": 786 }, { "epoch": 0.3136395337252167, "grad_norm": 0.33346839612618157, "learning_rate": 3.142368930160098e-05, "loss": 0.8607, "step": 787 }, { "epoch": 0.31403805918103017, "grad_norm": 0.34733144268906585, "learning_rate": 3.140290942233985e-05, "loss": 0.858, "step": 788 }, { "epoch": 0.31443658463684365, "grad_norm": 0.3523769266485713, "learning_rate": 3.138211128891578e-05, "loss": 0.8245, "step": 789 }, { "epoch": 0.3148351100926572, "grad_norm": 0.3491121768861967, "learning_rate": 3.136129493462312e-05, "loss": 0.8394, "step": 790 }, { "epoch": 0.31523363554847067, "grad_norm": 0.3878058197741651, "learning_rate": 3.134046039278539e-05, "loss": 0.8406, "step": 791 }, { "epoch": 0.31563216100428415, "grad_norm": 0.3331713976353916, "learning_rate": 3.131960769675524e-05, "loss": 0.8205, "step": 792 }, { "epoch": 0.31603068646009763, "grad_norm": 0.3902176893077025, "learning_rate": 3.1298736879914364e-05, "loss": 0.8634, "step": 793 }, { "epoch": 0.3164292119159111, "grad_norm": 0.39518447785038, "learning_rate": 3.127784797567347e-05, "loss": 0.8298, "step": 794 }, { "epoch": 0.3168277373717246, "grad_norm": 0.3422487336442997, "learning_rate": 3.125694101747222e-05, "loss": 0.8613, "step": 795 }, { "epoch": 0.31722626282753813, "grad_norm": 0.33332846452402065, "learning_rate": 3.123601603877918e-05, "loss": 0.8502, "step": 796 }, { "epoch": 0.3176247882833516, "grad_norm": 0.6423101526850392, "learning_rate": 3.121507307309178e-05, "loss": 0.8338, "step": 797 }, { "epoch": 0.3180233137391651, "grad_norm": 0.38531993142674054, "learning_rate": 3.11941121539362e-05, "loss": 0.7963, "step": 798 }, { "epoch": 0.3184218391949786, "grad_norm": 0.3592316503041697, "learning_rate": 3.1173133314867414e-05, "loss": 0.8411, "step": 799 }, { "epoch": 0.31882036465079205, "grad_norm": 0.3598280004430287, "learning_rate": 3.115213658946904e-05, "loss": 0.8336, "step": 800 }, { "epoch": 0.31921889010660554, "grad_norm": 0.3496111681067253, "learning_rate": 3.113112201135335e-05, "loss": 0.8574, "step": 801 }, { "epoch": 0.31961741556241907, "grad_norm": 0.3664242703958735, "learning_rate": 3.11100896141612e-05, "loss": 0.8436, "step": 802 }, { "epoch": 0.32001594101823255, "grad_norm": 0.32787991821140705, "learning_rate": 3.108903943156194e-05, "loss": 0.8489, "step": 803 }, { "epoch": 0.32041446647404603, "grad_norm": 0.40557517482435224, "learning_rate": 3.106797149725344e-05, "loss": 0.8237, "step": 804 }, { "epoch": 0.3208129919298595, "grad_norm": 0.37518817153121636, "learning_rate": 3.1046885844961946e-05, "loss": 0.8274, "step": 805 }, { "epoch": 0.321211517385673, "grad_norm": 0.37714764259452016, "learning_rate": 3.102578250844209e-05, "loss": 0.8331, "step": 806 }, { "epoch": 0.3216100428414865, "grad_norm": 0.37798047544093105, "learning_rate": 3.10046615214768e-05, "loss": 0.8502, "step": 807 }, { "epoch": 0.3220085682973, "grad_norm": 0.4109920014418336, "learning_rate": 3.098352291787728e-05, "loss": 0.8227, "step": 808 }, { "epoch": 0.3224070937531135, "grad_norm": 0.4499775221189975, "learning_rate": 3.09623667314829e-05, "loss": 0.8247, "step": 809 }, { "epoch": 0.322805619208927, "grad_norm": 0.331922156881542, "learning_rate": 3.0941192996161215e-05, "loss": 0.7928, "step": 810 }, { "epoch": 0.32320414466474046, "grad_norm": 0.34248930965498, "learning_rate": 3.092000174580785e-05, "loss": 0.8432, "step": 811 }, { "epoch": 0.32360267012055394, "grad_norm": 0.35843509172736904, "learning_rate": 3.089879301434648e-05, "loss": 0.8477, "step": 812 }, { "epoch": 0.3240011955763674, "grad_norm": 0.3683897489622322, "learning_rate": 3.0877566835728755e-05, "loss": 0.8091, "step": 813 }, { "epoch": 0.32439972103218095, "grad_norm": 0.5313658405862416, "learning_rate": 3.0856323243934255e-05, "loss": 0.8279, "step": 814 }, { "epoch": 0.32479824648799444, "grad_norm": 0.37323472384352163, "learning_rate": 3.083506227297045e-05, "loss": 0.8326, "step": 815 }, { "epoch": 0.3251967719438079, "grad_norm": 0.39228250684825317, "learning_rate": 3.0813783956872615e-05, "loss": 0.8294, "step": 816 }, { "epoch": 0.3255952973996214, "grad_norm": 0.3652945541655549, "learning_rate": 3.07924883297038e-05, "loss": 0.846, "step": 817 }, { "epoch": 0.3259938228554349, "grad_norm": 0.3659044128890069, "learning_rate": 3.0771175425554766e-05, "loss": 0.8204, "step": 818 }, { "epoch": 0.32639234831124836, "grad_norm": 0.4707331446693342, "learning_rate": 3.074984527854392e-05, "loss": 0.8163, "step": 819 }, { "epoch": 0.3267908737670619, "grad_norm": 0.3606528922605574, "learning_rate": 3.072849792281731e-05, "loss": 0.8334, "step": 820 }, { "epoch": 0.3271893992228754, "grad_norm": 0.3770070622615337, "learning_rate": 3.0707133392548474e-05, "loss": 0.8224, "step": 821 }, { "epoch": 0.32758792467868886, "grad_norm": 0.359650139273174, "learning_rate": 3.068575172193849e-05, "loss": 0.8534, "step": 822 }, { "epoch": 0.32798645013450234, "grad_norm": 0.3456572438444792, "learning_rate": 3.066435294521584e-05, "loss": 0.889, "step": 823 }, { "epoch": 0.3283849755903158, "grad_norm": 0.3918222247018766, "learning_rate": 3.064293709663645e-05, "loss": 0.7898, "step": 824 }, { "epoch": 0.3287835010461293, "grad_norm": 0.4247237481434523, "learning_rate": 3.0621504210483495e-05, "loss": 0.8535, "step": 825 }, { "epoch": 0.32918202650194284, "grad_norm": 0.36874426839954455, "learning_rate": 3.0600054321067486e-05, "loss": 0.8336, "step": 826 }, { "epoch": 0.3295805519577563, "grad_norm": 0.4207632539441216, "learning_rate": 3.057858746272611e-05, "loss": 0.841, "step": 827 }, { "epoch": 0.3299790774135698, "grad_norm": 0.38496904071215293, "learning_rate": 3.055710366982427e-05, "loss": 0.8195, "step": 828 }, { "epoch": 0.3303776028693833, "grad_norm": 0.4663868777863652, "learning_rate": 3.053560297675392e-05, "loss": 0.8419, "step": 829 }, { "epoch": 0.33077612832519676, "grad_norm": 0.5264881698443798, "learning_rate": 3.0514085417934112e-05, "loss": 0.8017, "step": 830 }, { "epoch": 0.33117465378101024, "grad_norm": 0.4647249062040843, "learning_rate": 3.0492551027810876e-05, "loss": 0.8468, "step": 831 }, { "epoch": 0.3315731792368238, "grad_norm": 0.305099119380529, "learning_rate": 3.04709998408572e-05, "loss": 0.7996, "step": 832 }, { "epoch": 0.33197170469263726, "grad_norm": 0.46977408947791516, "learning_rate": 3.0449431891572936e-05, "loss": 0.8474, "step": 833 }, { "epoch": 0.33237023014845074, "grad_norm": 0.44745094401575514, "learning_rate": 3.0427847214484804e-05, "loss": 0.8349, "step": 834 }, { "epoch": 0.3327687556042642, "grad_norm": 0.3543202737692515, "learning_rate": 3.0406245844146273e-05, "loss": 0.8253, "step": 835 }, { "epoch": 0.3331672810600777, "grad_norm": 0.3933697240001331, "learning_rate": 3.0384627815137553e-05, "loss": 0.8125, "step": 836 }, { "epoch": 0.3335658065158912, "grad_norm": 0.5148260560348337, "learning_rate": 3.0362993162065516e-05, "loss": 0.8627, "step": 837 }, { "epoch": 0.3339643319717047, "grad_norm": 0.48857380870627215, "learning_rate": 3.034134191956364e-05, "loss": 0.8236, "step": 838 }, { "epoch": 0.3343628574275182, "grad_norm": 0.4056146666480351, "learning_rate": 3.0319674122291977e-05, "loss": 0.8302, "step": 839 }, { "epoch": 0.3347613828833317, "grad_norm": 0.3904977617394034, "learning_rate": 3.0297989804937057e-05, "loss": 0.8167, "step": 840 }, { "epoch": 0.33515990833914516, "grad_norm": 0.5044251985190126, "learning_rate": 3.027628900221187e-05, "loss": 0.8233, "step": 841 }, { "epoch": 0.33555843379495864, "grad_norm": 0.4250841928547596, "learning_rate": 3.025457174885581e-05, "loss": 0.8281, "step": 842 }, { "epoch": 0.3359569592507721, "grad_norm": 0.3823907908471619, "learning_rate": 3.0232838079634575e-05, "loss": 0.8242, "step": 843 }, { "epoch": 0.3363554847065856, "grad_norm": 0.42945934078552406, "learning_rate": 3.0211088029340154e-05, "loss": 0.8354, "step": 844 }, { "epoch": 0.33675401016239914, "grad_norm": 0.42902292521578395, "learning_rate": 3.018932163279078e-05, "loss": 0.833, "step": 845 }, { "epoch": 0.3371525356182126, "grad_norm": 0.3937451062114422, "learning_rate": 3.016753892483083e-05, "loss": 0.7891, "step": 846 }, { "epoch": 0.3375510610740261, "grad_norm": 0.3540399272237491, "learning_rate": 3.0145739940330786e-05, "loss": 0.8573, "step": 847 }, { "epoch": 0.3379495865298396, "grad_norm": 0.4084630243877346, "learning_rate": 3.0123924714187214e-05, "loss": 0.8234, "step": 848 }, { "epoch": 0.33834811198565307, "grad_norm": 0.42274333879010845, "learning_rate": 3.0102093281322666e-05, "loss": 0.8212, "step": 849 }, { "epoch": 0.33874663744146655, "grad_norm": 0.3321533474722135, "learning_rate": 3.008024567668563e-05, "loss": 0.8173, "step": 850 }, { "epoch": 0.3391451628972801, "grad_norm": 0.3692564529574208, "learning_rate": 3.0058381935250495e-05, "loss": 0.8557, "step": 851 }, { "epoch": 0.33954368835309356, "grad_norm": 0.39610202569549047, "learning_rate": 3.0036502092017473e-05, "loss": 0.8654, "step": 852 }, { "epoch": 0.33994221380890705, "grad_norm": 0.3661238023568551, "learning_rate": 3.0014606182012566e-05, "loss": 0.8727, "step": 853 }, { "epoch": 0.3403407392647205, "grad_norm": 0.3872040100330332, "learning_rate": 2.9992694240287474e-05, "loss": 0.8291, "step": 854 }, { "epoch": 0.340739264720534, "grad_norm": 0.3974606504195108, "learning_rate": 2.9970766301919583e-05, "loss": 0.8679, "step": 855 }, { "epoch": 0.3411377901763475, "grad_norm": 0.3938746707369231, "learning_rate": 2.994882240201188e-05, "loss": 0.8433, "step": 856 }, { "epoch": 0.341536315632161, "grad_norm": 0.3691659772037152, "learning_rate": 2.99268625756929e-05, "loss": 0.8393, "step": 857 }, { "epoch": 0.3419348410879745, "grad_norm": 0.3780103920503278, "learning_rate": 2.990488685811667e-05, "loss": 0.8346, "step": 858 }, { "epoch": 0.342333366543788, "grad_norm": 0.4073582614267046, "learning_rate": 2.9882895284462664e-05, "loss": 0.8476, "step": 859 }, { "epoch": 0.34273189199960147, "grad_norm": 0.34365964699391127, "learning_rate": 2.9860887889935744e-05, "loss": 0.8282, "step": 860 }, { "epoch": 0.34313041745541495, "grad_norm": 0.40120857716998304, "learning_rate": 2.983886470976608e-05, "loss": 0.8275, "step": 861 }, { "epoch": 0.34352894291122843, "grad_norm": 0.3959132704688456, "learning_rate": 2.9816825779209133e-05, "loss": 0.8251, "step": 862 }, { "epoch": 0.34392746836704197, "grad_norm": 0.4334298136162478, "learning_rate": 2.9794771133545565e-05, "loss": 0.822, "step": 863 }, { "epoch": 0.34432599382285545, "grad_norm": 0.3870945760786885, "learning_rate": 2.977270080808119e-05, "loss": 0.8251, "step": 864 }, { "epoch": 0.34472451927866893, "grad_norm": 0.37106301614057785, "learning_rate": 2.975061483814694e-05, "loss": 0.8545, "step": 865 }, { "epoch": 0.3451230447344824, "grad_norm": 0.38427213586073594, "learning_rate": 2.9728513259098784e-05, "loss": 0.8161, "step": 866 }, { "epoch": 0.3455215701902959, "grad_norm": 0.3916565010304088, "learning_rate": 2.9706396106317675e-05, "loss": 0.8419, "step": 867 }, { "epoch": 0.34592009564610937, "grad_norm": 0.3709069418845533, "learning_rate": 2.96842634152095e-05, "loss": 0.846, "step": 868 }, { "epoch": 0.3463186211019229, "grad_norm": 0.3703215359984664, "learning_rate": 2.9662115221205015e-05, "loss": 0.8222, "step": 869 }, { "epoch": 0.3467171465577364, "grad_norm": 0.3464063836842463, "learning_rate": 2.9639951559759802e-05, "loss": 0.8036, "step": 870 }, { "epoch": 0.34711567201354987, "grad_norm": 0.35048924216820243, "learning_rate": 2.9617772466354192e-05, "loss": 0.818, "step": 871 }, { "epoch": 0.34751419746936335, "grad_norm": 0.374548515628163, "learning_rate": 2.9595577976493238e-05, "loss": 0.8199, "step": 872 }, { "epoch": 0.34791272292517683, "grad_norm": 0.37643952630682037, "learning_rate": 2.9573368125706624e-05, "loss": 0.825, "step": 873 }, { "epoch": 0.3483112483809903, "grad_norm": 0.3873605831737666, "learning_rate": 2.9551142949548634e-05, "loss": 0.8183, "step": 874 }, { "epoch": 0.34870977383680385, "grad_norm": 0.3639676705380599, "learning_rate": 2.9528902483598076e-05, "loss": 0.8536, "step": 875 }, { "epoch": 0.34910829929261733, "grad_norm": 0.3239254348822666, "learning_rate": 2.950664676345824e-05, "loss": 0.7855, "step": 876 }, { "epoch": 0.3495068247484308, "grad_norm": 0.4333203945657134, "learning_rate": 2.9484375824756845e-05, "loss": 0.8377, "step": 877 }, { "epoch": 0.3499053502042443, "grad_norm": 0.366000478962248, "learning_rate": 2.946208970314595e-05, "loss": 0.841, "step": 878 }, { "epoch": 0.3503038756600578, "grad_norm": 0.36232443933919917, "learning_rate": 2.943978843430194e-05, "loss": 0.8415, "step": 879 }, { "epoch": 0.35070240111587125, "grad_norm": 0.3632587538915808, "learning_rate": 2.9417472053925435e-05, "loss": 0.833, "step": 880 }, { "epoch": 0.3511009265716848, "grad_norm": 0.34528067844688565, "learning_rate": 2.939514059774126e-05, "loss": 0.8089, "step": 881 }, { "epoch": 0.35149945202749827, "grad_norm": 0.3186838935536136, "learning_rate": 2.9372794101498353e-05, "loss": 0.8112, "step": 882 }, { "epoch": 0.35189797748331175, "grad_norm": 0.33496956021034613, "learning_rate": 2.935043260096975e-05, "loss": 0.8421, "step": 883 }, { "epoch": 0.35229650293912523, "grad_norm": 0.33411225546854484, "learning_rate": 2.932805613195249e-05, "loss": 0.8113, "step": 884 }, { "epoch": 0.3526950283949387, "grad_norm": 0.32478642663480967, "learning_rate": 2.9305664730267586e-05, "loss": 0.8046, "step": 885 }, { "epoch": 0.3530935538507522, "grad_norm": 0.3631121635365864, "learning_rate": 2.9283258431759954e-05, "loss": 0.8173, "step": 886 }, { "epoch": 0.35349207930656573, "grad_norm": 0.3429622024570721, "learning_rate": 2.926083727229835e-05, "loss": 0.8583, "step": 887 }, { "epoch": 0.3538906047623792, "grad_norm": 0.345044521347691, "learning_rate": 2.923840128777532e-05, "loss": 0.813, "step": 888 }, { "epoch": 0.3542891302181927, "grad_norm": 0.3694760550020032, "learning_rate": 2.9215950514107155e-05, "loss": 0.8315, "step": 889 }, { "epoch": 0.3546876556740062, "grad_norm": 0.34900971672785386, "learning_rate": 2.9193484987233804e-05, "loss": 0.8251, "step": 890 }, { "epoch": 0.35508618112981966, "grad_norm": 0.36620900329612915, "learning_rate": 2.917100474311885e-05, "loss": 0.8243, "step": 891 }, { "epoch": 0.35548470658563314, "grad_norm": 0.3732972879676541, "learning_rate": 2.9148509817749424e-05, "loss": 0.8263, "step": 892 }, { "epoch": 0.3558832320414467, "grad_norm": 0.3754066448612361, "learning_rate": 2.9126000247136162e-05, "loss": 0.8549, "step": 893 }, { "epoch": 0.35628175749726015, "grad_norm": 0.37766294343524515, "learning_rate": 2.910347606731315e-05, "loss": 0.8642, "step": 894 }, { "epoch": 0.35668028295307364, "grad_norm": 0.3335713482308801, "learning_rate": 2.9080937314337853e-05, "loss": 0.8261, "step": 895 }, { "epoch": 0.3570788084088871, "grad_norm": 0.3586058859524884, "learning_rate": 2.9058384024291064e-05, "loss": 0.8299, "step": 896 }, { "epoch": 0.3574773338647006, "grad_norm": 0.35518778170798426, "learning_rate": 2.9035816233276866e-05, "loss": 0.8664, "step": 897 }, { "epoch": 0.3578758593205141, "grad_norm": 0.3226292379642851, "learning_rate": 2.901323397742253e-05, "loss": 0.8176, "step": 898 }, { "epoch": 0.3582743847763276, "grad_norm": 0.2963818087079733, "learning_rate": 2.8990637292878495e-05, "loss": 0.8379, "step": 899 }, { "epoch": 0.3586729102321411, "grad_norm": 0.330128684962309, "learning_rate": 2.896802621581831e-05, "loss": 0.8069, "step": 900 }, { "epoch": 0.3590714356879546, "grad_norm": 0.30550512523931456, "learning_rate": 2.8945400782438536e-05, "loss": 0.8098, "step": 901 }, { "epoch": 0.35946996114376806, "grad_norm": 0.3225722537828969, "learning_rate": 2.8922761028958735e-05, "loss": 0.8256, "step": 902 }, { "epoch": 0.35986848659958154, "grad_norm": 0.32436626447460576, "learning_rate": 2.89001069916214e-05, "loss": 0.8697, "step": 903 }, { "epoch": 0.360267012055395, "grad_norm": 0.3248090965744356, "learning_rate": 2.8877438706691876e-05, "loss": 0.7905, "step": 904 }, { "epoch": 0.36066553751120856, "grad_norm": 0.3423557906931257, "learning_rate": 2.8854756210458305e-05, "loss": 0.808, "step": 905 }, { "epoch": 0.36106406296702204, "grad_norm": 0.3533066672835484, "learning_rate": 2.8832059539231612e-05, "loss": 0.8158, "step": 906 }, { "epoch": 0.3614625884228355, "grad_norm": 0.3274286434791991, "learning_rate": 2.88093487293454e-05, "loss": 0.7964, "step": 907 }, { "epoch": 0.361861113878649, "grad_norm": 0.3549517407326649, "learning_rate": 2.8786623817155875e-05, "loss": 0.8459, "step": 908 }, { "epoch": 0.3622596393344625, "grad_norm": 0.3179414770046732, "learning_rate": 2.8763884839041876e-05, "loss": 0.8141, "step": 909 }, { "epoch": 0.36265816479027596, "grad_norm": 0.34921190558386694, "learning_rate": 2.87411318314047e-05, "loss": 0.8319, "step": 910 }, { "epoch": 0.36305669024608944, "grad_norm": 0.46547909862633313, "learning_rate": 2.8718364830668153e-05, "loss": 0.8386, "step": 911 }, { "epoch": 0.363455215701903, "grad_norm": 0.3362430896899564, "learning_rate": 2.8695583873278402e-05, "loss": 0.8087, "step": 912 }, { "epoch": 0.36385374115771646, "grad_norm": 0.3421880254638392, "learning_rate": 2.8672788995703985e-05, "loss": 0.8288, "step": 913 }, { "epoch": 0.36425226661352994, "grad_norm": 0.33774819740594564, "learning_rate": 2.864998023443571e-05, "loss": 0.8284, "step": 914 }, { "epoch": 0.3646507920693434, "grad_norm": 0.32177729327477683, "learning_rate": 2.862715762598662e-05, "loss": 0.8086, "step": 915 }, { "epoch": 0.3650493175251569, "grad_norm": 0.31718396437386565, "learning_rate": 2.8604321206891904e-05, "loss": 0.8077, "step": 916 }, { "epoch": 0.3654478429809704, "grad_norm": 0.3078535072758799, "learning_rate": 2.858147101370888e-05, "loss": 0.815, "step": 917 }, { "epoch": 0.3658463684367839, "grad_norm": 0.3251261011534896, "learning_rate": 2.855860708301692e-05, "loss": 0.8154, "step": 918 }, { "epoch": 0.3662448938925974, "grad_norm": 0.32646080328089405, "learning_rate": 2.8535729451417354e-05, "loss": 0.8495, "step": 919 }, { "epoch": 0.3666434193484109, "grad_norm": 0.32013473579432894, "learning_rate": 2.851283815553349e-05, "loss": 0.8257, "step": 920 }, { "epoch": 0.36704194480422436, "grad_norm": 0.3404460262778686, "learning_rate": 2.8489933232010486e-05, "loss": 0.8274, "step": 921 }, { "epoch": 0.36744047026003784, "grad_norm": 0.3179214806128248, "learning_rate": 2.8467014717515303e-05, "loss": 0.8221, "step": 922 }, { "epoch": 0.3678389957158513, "grad_norm": 0.3686956431219607, "learning_rate": 2.8444082648736695e-05, "loss": 0.8577, "step": 923 }, { "epoch": 0.36823752117166486, "grad_norm": 0.3319571070853765, "learning_rate": 2.8421137062385077e-05, "loss": 0.8472, "step": 924 }, { "epoch": 0.36863604662747834, "grad_norm": 0.33391728985772273, "learning_rate": 2.839817799519252e-05, "loss": 0.8407, "step": 925 }, { "epoch": 0.3690345720832918, "grad_norm": 0.36377333064615536, "learning_rate": 2.8375205483912683e-05, "loss": 0.8062, "step": 926 }, { "epoch": 0.3694330975391053, "grad_norm": 0.3192797421529141, "learning_rate": 2.8352219565320734e-05, "loss": 0.8198, "step": 927 }, { "epoch": 0.3698316229949188, "grad_norm": 0.34072810185050395, "learning_rate": 2.8329220276213312e-05, "loss": 0.8553, "step": 928 }, { "epoch": 0.37023014845073227, "grad_norm": 0.3510179405385589, "learning_rate": 2.8306207653408452e-05, "loss": 0.803, "step": 929 }, { "epoch": 0.3706286739065458, "grad_norm": 0.33046352991412514, "learning_rate": 2.8283181733745545e-05, "loss": 0.8196, "step": 930 }, { "epoch": 0.3710271993623593, "grad_norm": 0.3296330314721836, "learning_rate": 2.826014255408525e-05, "loss": 0.8113, "step": 931 }, { "epoch": 0.37142572481817276, "grad_norm": 0.32819051407453925, "learning_rate": 2.823709015130948e-05, "loss": 0.8363, "step": 932 }, { "epoch": 0.37182425027398625, "grad_norm": 0.32244270165621963, "learning_rate": 2.8214024562321288e-05, "loss": 0.8159, "step": 933 }, { "epoch": 0.3722227757297997, "grad_norm": 0.33554287954574435, "learning_rate": 2.8190945824044854e-05, "loss": 0.8275, "step": 934 }, { "epoch": 0.3726213011856132, "grad_norm": 0.31619676372667777, "learning_rate": 2.8167853973425408e-05, "loss": 0.8237, "step": 935 }, { "epoch": 0.37301982664142674, "grad_norm": 0.3145096541701049, "learning_rate": 2.8144749047429155e-05, "loss": 0.8112, "step": 936 }, { "epoch": 0.3734183520972402, "grad_norm": 0.3733084988221381, "learning_rate": 2.812163108304325e-05, "loss": 0.8492, "step": 937 }, { "epoch": 0.3738168775530537, "grad_norm": 0.3271910427372345, "learning_rate": 2.8098500117275708e-05, "loss": 0.8409, "step": 938 }, { "epoch": 0.3742154030088672, "grad_norm": 0.3506373095855538, "learning_rate": 2.8075356187155357e-05, "loss": 0.8255, "step": 939 }, { "epoch": 0.37461392846468067, "grad_norm": 0.3523796388032185, "learning_rate": 2.805219932973179e-05, "loss": 0.8198, "step": 940 }, { "epoch": 0.37501245392049415, "grad_norm": 0.31630826125781786, "learning_rate": 2.8029029582075286e-05, "loss": 0.8279, "step": 941 }, { "epoch": 0.3754109793763077, "grad_norm": 0.31383140189055664, "learning_rate": 2.8005846981276758e-05, "loss": 0.84, "step": 942 }, { "epoch": 0.37580950483212117, "grad_norm": 0.3308152244077927, "learning_rate": 2.79826515644477e-05, "loss": 0.8551, "step": 943 }, { "epoch": 0.37620803028793465, "grad_norm": 0.3183707047927005, "learning_rate": 2.795944336872012e-05, "loss": 0.835, "step": 944 }, { "epoch": 0.37660655574374813, "grad_norm": 0.34065129082815276, "learning_rate": 2.7936222431246478e-05, "loss": 0.8194, "step": 945 }, { "epoch": 0.3770050811995616, "grad_norm": 0.33055758193564483, "learning_rate": 2.791298878919964e-05, "loss": 0.8295, "step": 946 }, { "epoch": 0.3774036066553751, "grad_norm": 0.3178548706287361, "learning_rate": 2.7889742479772793e-05, "loss": 0.8487, "step": 947 }, { "epoch": 0.3778021321111886, "grad_norm": 0.34056866287653254, "learning_rate": 2.7866483540179438e-05, "loss": 0.822, "step": 948 }, { "epoch": 0.3782006575670021, "grad_norm": 0.3530872392015572, "learning_rate": 2.784321200765326e-05, "loss": 0.7945, "step": 949 }, { "epoch": 0.3785991830228156, "grad_norm": 0.34823844388780467, "learning_rate": 2.781992791944811e-05, "loss": 0.8343, "step": 950 }, { "epoch": 0.37899770847862907, "grad_norm": 0.32473433019889203, "learning_rate": 2.779663131283795e-05, "loss": 0.7889, "step": 951 }, { "epoch": 0.37939623393444255, "grad_norm": 0.3440773152101907, "learning_rate": 2.7773322225116774e-05, "loss": 0.8085, "step": 952 }, { "epoch": 0.37979475939025603, "grad_norm": 0.3136356275301238, "learning_rate": 2.7750000693598557e-05, "loss": 0.7984, "step": 953 }, { "epoch": 0.38019328484606957, "grad_norm": 0.36010994273938446, "learning_rate": 2.7726666755617198e-05, "loss": 0.8176, "step": 954 }, { "epoch": 0.38059181030188305, "grad_norm": 0.4042048335792527, "learning_rate": 2.770332044852645e-05, "loss": 0.8298, "step": 955 }, { "epoch": 0.38099033575769653, "grad_norm": 0.33696767739158523, "learning_rate": 2.7679961809699878e-05, "loss": 0.7998, "step": 956 }, { "epoch": 0.38138886121351, "grad_norm": 0.32263411827838845, "learning_rate": 2.765659087653077e-05, "loss": 0.8234, "step": 957 }, { "epoch": 0.3817873866693235, "grad_norm": 0.3199567939883172, "learning_rate": 2.7633207686432113e-05, "loss": 0.8108, "step": 958 }, { "epoch": 0.382185912125137, "grad_norm": 0.33168910588991024, "learning_rate": 2.760981227683651e-05, "loss": 0.8313, "step": 959 }, { "epoch": 0.3825844375809505, "grad_norm": 0.3238687202666879, "learning_rate": 2.758640468519611e-05, "loss": 0.8321, "step": 960 }, { "epoch": 0.382982963036764, "grad_norm": 0.3478685120540082, "learning_rate": 2.7562984948982595e-05, "loss": 0.824, "step": 961 }, { "epoch": 0.38338148849257747, "grad_norm": 0.4127997530905888, "learning_rate": 2.7539553105687063e-05, "loss": 0.8061, "step": 962 }, { "epoch": 0.38378001394839095, "grad_norm": 0.3571852104724218, "learning_rate": 2.7516109192820003e-05, "loss": 0.8401, "step": 963 }, { "epoch": 0.38417853940420443, "grad_norm": 0.33227253978050236, "learning_rate": 2.749265324791122e-05, "loss": 0.8522, "step": 964 }, { "epoch": 0.3845770648600179, "grad_norm": 0.5247271121688866, "learning_rate": 2.7469185308509786e-05, "loss": 0.8134, "step": 965 }, { "epoch": 0.38497559031583145, "grad_norm": 0.3470222523911159, "learning_rate": 2.744570541218397e-05, "loss": 0.7991, "step": 966 }, { "epoch": 0.38537411577164493, "grad_norm": 0.34151142631527753, "learning_rate": 2.7422213596521183e-05, "loss": 0.8467, "step": 967 }, { "epoch": 0.3857726412274584, "grad_norm": 0.519889333298418, "learning_rate": 2.7398709899127927e-05, "loss": 0.8306, "step": 968 }, { "epoch": 0.3861711666832719, "grad_norm": 0.3258609895102337, "learning_rate": 2.7375194357629696e-05, "loss": 0.7873, "step": 969 }, { "epoch": 0.3865696921390854, "grad_norm": 0.4295037852575729, "learning_rate": 2.7351667009670993e-05, "loss": 0.8403, "step": 970 }, { "epoch": 0.38696821759489886, "grad_norm": 0.36998924298526037, "learning_rate": 2.732812789291516e-05, "loss": 0.8075, "step": 971 }, { "epoch": 0.38736674305071234, "grad_norm": 0.32705437276780996, "learning_rate": 2.7304577045044433e-05, "loss": 0.8282, "step": 972 }, { "epoch": 0.3877652685065259, "grad_norm": 0.3340699092845928, "learning_rate": 2.72810145037598e-05, "loss": 0.7963, "step": 973 }, { "epoch": 0.38816379396233935, "grad_norm": 0.3503260696592739, "learning_rate": 2.7257440306780968e-05, "loss": 0.8606, "step": 974 }, { "epoch": 0.38856231941815284, "grad_norm": 0.3459980112053063, "learning_rate": 2.7233854491846314e-05, "loss": 0.7951, "step": 975 }, { "epoch": 0.3889608448739663, "grad_norm": 0.319254119951506, "learning_rate": 2.721025709671281e-05, "loss": 0.8032, "step": 976 }, { "epoch": 0.3893593703297798, "grad_norm": 0.4897236117125459, "learning_rate": 2.7186648159155962e-05, "loss": 0.8315, "step": 977 }, { "epoch": 0.3897578957855933, "grad_norm": 0.3087529107037527, "learning_rate": 2.7163027716969755e-05, "loss": 0.8117, "step": 978 }, { "epoch": 0.3901564212414068, "grad_norm": 0.3275439817021243, "learning_rate": 2.7139395807966588e-05, "loss": 0.8346, "step": 979 }, { "epoch": 0.3905549466972203, "grad_norm": 0.3083375926780146, "learning_rate": 2.7115752469977224e-05, "loss": 0.8136, "step": 980 }, { "epoch": 0.3909534721530338, "grad_norm": 0.3069416211569783, "learning_rate": 2.7092097740850712e-05, "loss": 0.8213, "step": 981 }, { "epoch": 0.39135199760884726, "grad_norm": 0.3138396694972504, "learning_rate": 2.7068431658454355e-05, "loss": 0.8405, "step": 982 }, { "epoch": 0.39175052306466074, "grad_norm": 0.3236054977163557, "learning_rate": 2.7044754260673607e-05, "loss": 0.8085, "step": 983 }, { "epoch": 0.3921490485204742, "grad_norm": 0.31483246013918365, "learning_rate": 2.702106558541205e-05, "loss": 0.8244, "step": 984 }, { "epoch": 0.39254757397628776, "grad_norm": 0.3541307522351268, "learning_rate": 2.699736567059132e-05, "loss": 0.8002, "step": 985 }, { "epoch": 0.39294609943210124, "grad_norm": 0.32364536612849215, "learning_rate": 2.6973654554151028e-05, "loss": 0.8198, "step": 986 }, { "epoch": 0.3933446248879147, "grad_norm": 0.33398363230386113, "learning_rate": 2.694993227404875e-05, "loss": 0.8393, "step": 987 }, { "epoch": 0.3937431503437282, "grad_norm": 0.349530991319565, "learning_rate": 2.69261988682599e-05, "loss": 0.821, "step": 988 }, { "epoch": 0.3941416757995417, "grad_norm": 0.3679139832318692, "learning_rate": 2.690245437477772e-05, "loss": 0.815, "step": 989 }, { "epoch": 0.39454020125535516, "grad_norm": 0.31671482584430505, "learning_rate": 2.6878698831613202e-05, "loss": 0.8636, "step": 990 }, { "epoch": 0.3949387267111687, "grad_norm": 0.3452241320073205, "learning_rate": 2.6854932276795026e-05, "loss": 0.8111, "step": 991 }, { "epoch": 0.3953372521669822, "grad_norm": 0.3018394208024079, "learning_rate": 2.6831154748369485e-05, "loss": 0.8273, "step": 992 }, { "epoch": 0.39573577762279566, "grad_norm": 0.322632592726802, "learning_rate": 2.6807366284400457e-05, "loss": 0.8038, "step": 993 }, { "epoch": 0.39613430307860914, "grad_norm": 0.34241476868414766, "learning_rate": 2.6783566922969318e-05, "loss": 0.8158, "step": 994 }, { "epoch": 0.3965328285344226, "grad_norm": 0.35584217533454204, "learning_rate": 2.675975670217489e-05, "loss": 0.83, "step": 995 }, { "epoch": 0.3969313539902361, "grad_norm": 0.29169575061351766, "learning_rate": 2.673593566013338e-05, "loss": 0.8124, "step": 996 }, { "epoch": 0.39732987944604964, "grad_norm": 0.3705964533467081, "learning_rate": 2.671210383497832e-05, "loss": 0.8304, "step": 997 }, { "epoch": 0.3977284049018631, "grad_norm": 0.33331825287941125, "learning_rate": 2.66882612648605e-05, "loss": 0.8232, "step": 998 }, { "epoch": 0.3981269303576766, "grad_norm": 0.3379785793208752, "learning_rate": 2.666440798794791e-05, "loss": 0.8113, "step": 999 }, { "epoch": 0.3985254558134901, "grad_norm": 0.47824925692484593, "learning_rate": 2.6640544042425685e-05, "loss": 0.8411, "step": 1000 }, { "epoch": 0.39892398126930356, "grad_norm": 0.33431552475555065, "learning_rate": 2.6616669466496037e-05, "loss": 0.8468, "step": 1001 }, { "epoch": 0.39932250672511704, "grad_norm": 0.34137387833760563, "learning_rate": 2.6592784298378188e-05, "loss": 0.8418, "step": 1002 }, { "epoch": 0.3997210321809306, "grad_norm": 0.39365755246331835, "learning_rate": 2.656888857630833e-05, "loss": 0.8224, "step": 1003 }, { "epoch": 0.40011955763674406, "grad_norm": 0.49873669187777425, "learning_rate": 2.654498233853954e-05, "loss": 0.808, "step": 1004 }, { "epoch": 0.40051808309255754, "grad_norm": 0.3248564941543554, "learning_rate": 2.652106562334173e-05, "loss": 0.8139, "step": 1005 }, { "epoch": 0.400916608548371, "grad_norm": 0.37674381602697304, "learning_rate": 2.649713846900159e-05, "loss": 0.8295, "step": 1006 }, { "epoch": 0.4013151340041845, "grad_norm": 0.39334509517210275, "learning_rate": 2.6473200913822514e-05, "loss": 0.8131, "step": 1007 }, { "epoch": 0.401713659459998, "grad_norm": 0.3224088785864611, "learning_rate": 2.644925299612455e-05, "loss": 0.7975, "step": 1008 }, { "epoch": 0.4021121849158115, "grad_norm": 0.4490995597319525, "learning_rate": 2.642529475424433e-05, "loss": 0.8337, "step": 1009 }, { "epoch": 0.402510710371625, "grad_norm": 0.34775476784856935, "learning_rate": 2.6401326226535037e-05, "loss": 0.81, "step": 1010 }, { "epoch": 0.4029092358274385, "grad_norm": 0.3424684175535075, "learning_rate": 2.6377347451366278e-05, "loss": 0.7884, "step": 1011 }, { "epoch": 0.40330776128325196, "grad_norm": 0.32706343083018596, "learning_rate": 2.6353358467124094e-05, "loss": 0.8105, "step": 1012 }, { "epoch": 0.40370628673906545, "grad_norm": 0.3531111527591312, "learning_rate": 2.632935931221087e-05, "loss": 0.8524, "step": 1013 }, { "epoch": 0.4041048121948789, "grad_norm": 0.3134079131717474, "learning_rate": 2.6305350025045257e-05, "loss": 0.8258, "step": 1014 }, { "epoch": 0.40450333765069246, "grad_norm": 0.3041258254708691, "learning_rate": 2.6281330644062126e-05, "loss": 0.8363, "step": 1015 }, { "epoch": 0.40490186310650594, "grad_norm": 0.34271831802902314, "learning_rate": 2.6257301207712536e-05, "loss": 0.8045, "step": 1016 }, { "epoch": 0.4053003885623194, "grad_norm": 0.31087347936442256, "learning_rate": 2.6233261754463605e-05, "loss": 0.8331, "step": 1017 }, { "epoch": 0.4056989140181329, "grad_norm": 0.32439571159756025, "learning_rate": 2.62092123227985e-05, "loss": 0.839, "step": 1018 }, { "epoch": 0.4060974394739464, "grad_norm": 0.3077347841509726, "learning_rate": 2.6185152951216373e-05, "loss": 0.8078, "step": 1019 }, { "epoch": 0.40649596492975987, "grad_norm": 0.32342264487059186, "learning_rate": 2.6161083678232277e-05, "loss": 0.8101, "step": 1020 }, { "epoch": 0.4068944903855734, "grad_norm": 0.4192377215503443, "learning_rate": 2.6137004542377122e-05, "loss": 0.8333, "step": 1021 }, { "epoch": 0.4072930158413869, "grad_norm": 0.3193163344884458, "learning_rate": 2.611291558219759e-05, "loss": 0.8177, "step": 1022 }, { "epoch": 0.40769154129720037, "grad_norm": 0.36366689035128674, "learning_rate": 2.608881683625612e-05, "loss": 0.8339, "step": 1023 }, { "epoch": 0.40809006675301385, "grad_norm": 0.2956336562200817, "learning_rate": 2.6064708343130787e-05, "loss": 0.8344, "step": 1024 }, { "epoch": 0.40848859220882733, "grad_norm": 0.35391087494148843, "learning_rate": 2.604059014141529e-05, "loss": 0.8243, "step": 1025 }, { "epoch": 0.4088871176646408, "grad_norm": 0.3753489106825966, "learning_rate": 2.601646226971885e-05, "loss": 0.816, "step": 1026 }, { "epoch": 0.40928564312045435, "grad_norm": 0.33358896662610243, "learning_rate": 2.5992324766666194e-05, "loss": 0.8168, "step": 1027 }, { "epoch": 0.4096841685762678, "grad_norm": 0.3494626801983563, "learning_rate": 2.5968177670897447e-05, "loss": 0.8158, "step": 1028 }, { "epoch": 0.4100826940320813, "grad_norm": 0.3311602416729186, "learning_rate": 2.5944021021068086e-05, "loss": 0.8289, "step": 1029 }, { "epoch": 0.4104812194878948, "grad_norm": 0.32920620411123275, "learning_rate": 2.591985485584891e-05, "loss": 0.8462, "step": 1030 }, { "epoch": 0.41087974494370827, "grad_norm": 0.33365179190960775, "learning_rate": 2.589567921392593e-05, "loss": 0.8316, "step": 1031 }, { "epoch": 0.41127827039952175, "grad_norm": 0.2840477218269186, "learning_rate": 2.587149413400032e-05, "loss": 0.8243, "step": 1032 }, { "epoch": 0.4116767958553353, "grad_norm": 0.3094870981520638, "learning_rate": 2.5847299654788384e-05, "loss": 0.8302, "step": 1033 }, { "epoch": 0.41207532131114877, "grad_norm": 0.33160788932455293, "learning_rate": 2.5823095815021458e-05, "loss": 0.8047, "step": 1034 }, { "epoch": 0.41247384676696225, "grad_norm": 0.3296215696895382, "learning_rate": 2.579888265344586e-05, "loss": 0.8408, "step": 1035 }, { "epoch": 0.41287237222277573, "grad_norm": 0.31027823213043904, "learning_rate": 2.5774660208822854e-05, "loss": 0.797, "step": 1036 }, { "epoch": 0.4132708976785892, "grad_norm": 0.3233755627458931, "learning_rate": 2.5750428519928542e-05, "loss": 0.8437, "step": 1037 }, { "epoch": 0.4136694231344027, "grad_norm": 0.3618514564925971, "learning_rate": 2.572618762555382e-05, "loss": 0.8202, "step": 1038 }, { "epoch": 0.4140679485902162, "grad_norm": 0.33907339886292404, "learning_rate": 2.5701937564504345e-05, "loss": 0.8199, "step": 1039 }, { "epoch": 0.4144664740460297, "grad_norm": 0.3068383167662696, "learning_rate": 2.5677678375600436e-05, "loss": 0.8301, "step": 1040 }, { "epoch": 0.4148649995018432, "grad_norm": 0.3790622200712186, "learning_rate": 2.565341009767701e-05, "loss": 0.8171, "step": 1041 }, { "epoch": 0.41526352495765667, "grad_norm": 0.3433456715007725, "learning_rate": 2.562913276958355e-05, "loss": 0.8431, "step": 1042 }, { "epoch": 0.41566205041347015, "grad_norm": 0.31629971388025424, "learning_rate": 2.5604846430184034e-05, "loss": 0.8188, "step": 1043 }, { "epoch": 0.41606057586928363, "grad_norm": 0.36903895294398353, "learning_rate": 2.5580551118356842e-05, "loss": 0.7884, "step": 1044 }, { "epoch": 0.4164591013250971, "grad_norm": 0.3504976082604236, "learning_rate": 2.5556246872994744e-05, "loss": 0.8139, "step": 1045 }, { "epoch": 0.41685762678091065, "grad_norm": 0.32035221174765094, "learning_rate": 2.5531933733004785e-05, "loss": 0.8017, "step": 1046 }, { "epoch": 0.41725615223672413, "grad_norm": 0.3394515589088212, "learning_rate": 2.550761173730827e-05, "loss": 0.8029, "step": 1047 }, { "epoch": 0.4176546776925376, "grad_norm": 0.35804587588727005, "learning_rate": 2.548328092484067e-05, "loss": 0.8015, "step": 1048 }, { "epoch": 0.4180532031483511, "grad_norm": 0.33858523464707274, "learning_rate": 2.5458941334551566e-05, "loss": 0.801, "step": 1049 }, { "epoch": 0.4184517286041646, "grad_norm": 0.3288133650068113, "learning_rate": 2.5434593005404605e-05, "loss": 0.8036, "step": 1050 }, { "epoch": 0.41885025405997806, "grad_norm": 0.3424539726833037, "learning_rate": 2.5410235976377418e-05, "loss": 0.8028, "step": 1051 }, { "epoch": 0.4192487795157916, "grad_norm": 0.3023013418013977, "learning_rate": 2.5385870286461547e-05, "loss": 0.8513, "step": 1052 }, { "epoch": 0.4196473049716051, "grad_norm": 0.34500936623066886, "learning_rate": 2.536149597466243e-05, "loss": 0.8254, "step": 1053 }, { "epoch": 0.42004583042741855, "grad_norm": 0.31922631055010225, "learning_rate": 2.5337113079999278e-05, "loss": 0.8363, "step": 1054 }, { "epoch": 0.42044435588323203, "grad_norm": 1.3109348539871832, "learning_rate": 2.5312721641505054e-05, "loss": 0.8507, "step": 1055 }, { "epoch": 0.4208428813390455, "grad_norm": 0.32480191303613704, "learning_rate": 2.5288321698226393e-05, "loss": 0.8271, "step": 1056 }, { "epoch": 0.421241406794859, "grad_norm": 0.37122122754776027, "learning_rate": 2.5263913289223567e-05, "loss": 0.8461, "step": 1057 }, { "epoch": 0.42163993225067253, "grad_norm": 0.3268123530148818, "learning_rate": 2.523949645357036e-05, "loss": 0.8081, "step": 1058 }, { "epoch": 0.422038457706486, "grad_norm": 0.3751401095220027, "learning_rate": 2.5215071230354085e-05, "loss": 0.7995, "step": 1059 }, { "epoch": 0.4224369831622995, "grad_norm": 0.3784425259279124, "learning_rate": 2.519063765867546e-05, "loss": 0.8189, "step": 1060 }, { "epoch": 0.422835508618113, "grad_norm": 0.3433963567838051, "learning_rate": 2.5166195777648565e-05, "loss": 0.8306, "step": 1061 }, { "epoch": 0.42323403407392646, "grad_norm": 0.3566697956385714, "learning_rate": 2.5141745626400804e-05, "loss": 0.8073, "step": 1062 }, { "epoch": 0.42363255952973994, "grad_norm": 0.3406773772854413, "learning_rate": 2.511728724407279e-05, "loss": 0.8126, "step": 1063 }, { "epoch": 0.4240310849855535, "grad_norm": 0.3227569499796658, "learning_rate": 2.509282066981834e-05, "loss": 0.8547, "step": 1064 }, { "epoch": 0.42442961044136696, "grad_norm": 0.43322555481131175, "learning_rate": 2.5068345942804372e-05, "loss": 0.8056, "step": 1065 }, { "epoch": 0.42482813589718044, "grad_norm": 0.3401664677873486, "learning_rate": 2.5043863102210854e-05, "loss": 0.8301, "step": 1066 }, { "epoch": 0.4252266613529939, "grad_norm": 0.3308251533254951, "learning_rate": 2.5019372187230734e-05, "loss": 0.8109, "step": 1067 }, { "epoch": 0.4256251868088074, "grad_norm": 0.3369938034523319, "learning_rate": 2.4994873237069922e-05, "loss": 0.8198, "step": 1068 }, { "epoch": 0.4260237122646209, "grad_norm": 0.3280216989154936, "learning_rate": 2.4970366290947145e-05, "loss": 0.8119, "step": 1069 }, { "epoch": 0.4264222377204344, "grad_norm": 0.335124086686642, "learning_rate": 2.4945851388093953e-05, "loss": 0.8111, "step": 1070 }, { "epoch": 0.4268207631762479, "grad_norm": 0.32998466833884404, "learning_rate": 2.4921328567754643e-05, "loss": 0.7979, "step": 1071 }, { "epoch": 0.4272192886320614, "grad_norm": 0.3245876352671091, "learning_rate": 2.489679786918617e-05, "loss": 0.8341, "step": 1072 }, { "epoch": 0.42761781408787486, "grad_norm": 0.339066366216921, "learning_rate": 2.4872259331658092e-05, "loss": 0.8412, "step": 1073 }, { "epoch": 0.42801633954368834, "grad_norm": 0.3632142337136183, "learning_rate": 2.4847712994452552e-05, "loss": 0.8287, "step": 1074 }, { "epoch": 0.4284148649995018, "grad_norm": 0.31666688457965547, "learning_rate": 2.4823158896864138e-05, "loss": 0.8108, "step": 1075 }, { "epoch": 0.42881339045531536, "grad_norm": 0.33156115315753226, "learning_rate": 2.479859707819989e-05, "loss": 0.8115, "step": 1076 }, { "epoch": 0.42921191591112884, "grad_norm": 0.3411054033949336, "learning_rate": 2.47740275777792e-05, "loss": 0.8132, "step": 1077 }, { "epoch": 0.4296104413669423, "grad_norm": 0.3402952332409344, "learning_rate": 2.4749450434933743e-05, "loss": 0.8076, "step": 1078 }, { "epoch": 0.4300089668227558, "grad_norm": 0.3223187993665448, "learning_rate": 2.472486568900745e-05, "loss": 0.8426, "step": 1079 }, { "epoch": 0.4304074922785693, "grad_norm": 0.31498257951573805, "learning_rate": 2.470027337935641e-05, "loss": 0.8166, "step": 1080 }, { "epoch": 0.43080601773438276, "grad_norm": 0.31243598520804755, "learning_rate": 2.4675673545348825e-05, "loss": 0.8295, "step": 1081 }, { "epoch": 0.4312045431901963, "grad_norm": 0.3538419858260921, "learning_rate": 2.4651066226364943e-05, "loss": 0.8293, "step": 1082 }, { "epoch": 0.4316030686460098, "grad_norm": 0.31794472665083506, "learning_rate": 2.462645146179698e-05, "loss": 0.8099, "step": 1083 }, { "epoch": 0.43200159410182326, "grad_norm": 0.31784933887583533, "learning_rate": 2.4601829291049098e-05, "loss": 0.7962, "step": 1084 }, { "epoch": 0.43240011955763674, "grad_norm": 0.42026330060809836, "learning_rate": 2.45771997535373e-05, "loss": 0.816, "step": 1085 }, { "epoch": 0.4327986450134502, "grad_norm": 0.33452336084693307, "learning_rate": 2.4552562888689376e-05, "loss": 0.8075, "step": 1086 }, { "epoch": 0.4331971704692637, "grad_norm": 0.322413780248328, "learning_rate": 2.4527918735944853e-05, "loss": 0.7956, "step": 1087 }, { "epoch": 0.43359569592507724, "grad_norm": 0.32866939422553315, "learning_rate": 2.4503267334754925e-05, "loss": 0.8368, "step": 1088 }, { "epoch": 0.4339942213808907, "grad_norm": 0.31934456546936785, "learning_rate": 2.447860872458239e-05, "loss": 0.8438, "step": 1089 }, { "epoch": 0.4343927468367042, "grad_norm": 0.32490029875471044, "learning_rate": 2.4453942944901575e-05, "loss": 0.8056, "step": 1090 }, { "epoch": 0.4347912722925177, "grad_norm": 0.32929458543358014, "learning_rate": 2.4429270035198313e-05, "loss": 0.8037, "step": 1091 }, { "epoch": 0.43518979774833116, "grad_norm": 0.32506473231877164, "learning_rate": 2.4404590034969822e-05, "loss": 0.8113, "step": 1092 }, { "epoch": 0.43558832320414465, "grad_norm": 0.29212134247678295, "learning_rate": 2.437990298372467e-05, "loss": 0.8005, "step": 1093 }, { "epoch": 0.4359868486599582, "grad_norm": 0.3455754520750264, "learning_rate": 2.4355208920982744e-05, "loss": 0.7994, "step": 1094 }, { "epoch": 0.43638537411577166, "grad_norm": 0.30065376764152013, "learning_rate": 2.4330507886275122e-05, "loss": 0.8164, "step": 1095 }, { "epoch": 0.43678389957158514, "grad_norm": 0.32131061866768784, "learning_rate": 2.4305799919144055e-05, "loss": 0.8316, "step": 1096 }, { "epoch": 0.4371824250273986, "grad_norm": 0.3311309667775356, "learning_rate": 2.4281085059142892e-05, "loss": 0.8194, "step": 1097 }, { "epoch": 0.4375809504832121, "grad_norm": 0.32898839768451466, "learning_rate": 2.4256363345836026e-05, "loss": 0.8321, "step": 1098 }, { "epoch": 0.4379794759390256, "grad_norm": 0.3065918498699849, "learning_rate": 2.4231634818798798e-05, "loss": 0.7826, "step": 1099 }, { "epoch": 0.4383780013948391, "grad_norm": 0.35465547671253245, "learning_rate": 2.4206899517617485e-05, "loss": 0.8267, "step": 1100 }, { "epoch": 0.4387765268506526, "grad_norm": 0.3301349903148197, "learning_rate": 2.4182157481889183e-05, "loss": 0.8022, "step": 1101 }, { "epoch": 0.4391750523064661, "grad_norm": 0.316437289769763, "learning_rate": 2.415740875122178e-05, "loss": 0.8036, "step": 1102 }, { "epoch": 0.43957357776227957, "grad_norm": 0.332243171121802, "learning_rate": 2.413265336523389e-05, "loss": 0.8352, "step": 1103 }, { "epoch": 0.43997210321809305, "grad_norm": 0.5376924415941126, "learning_rate": 2.4107891363554753e-05, "loss": 0.8306, "step": 1104 }, { "epoch": 0.44037062867390653, "grad_norm": 0.303147057063706, "learning_rate": 2.4083122785824236e-05, "loss": 0.7916, "step": 1105 }, { "epoch": 0.44076915412972, "grad_norm": 0.34716257230796316, "learning_rate": 2.405834767169271e-05, "loss": 0.7974, "step": 1106 }, { "epoch": 0.44116767958553355, "grad_norm": 0.3205567864972624, "learning_rate": 2.403356606082101e-05, "loss": 0.8002, "step": 1107 }, { "epoch": 0.441566205041347, "grad_norm": 0.29598982127864676, "learning_rate": 2.400877799288039e-05, "loss": 0.8077, "step": 1108 }, { "epoch": 0.4419647304971605, "grad_norm": 0.3707790401289273, "learning_rate": 2.398398350755242e-05, "loss": 0.8119, "step": 1109 }, { "epoch": 0.442363255952974, "grad_norm": 0.35724626182329483, "learning_rate": 2.3959182644528945e-05, "loss": 0.8117, "step": 1110 }, { "epoch": 0.44276178140878747, "grad_norm": 0.3194532912667194, "learning_rate": 2.3934375443512025e-05, "loss": 0.8052, "step": 1111 }, { "epoch": 0.44316030686460095, "grad_norm": 0.3897881316911469, "learning_rate": 2.3909561944213876e-05, "loss": 0.8188, "step": 1112 }, { "epoch": 0.4435588323204145, "grad_norm": 0.31474565450210384, "learning_rate": 2.3884742186356783e-05, "loss": 0.8301, "step": 1113 }, { "epoch": 0.44395735777622797, "grad_norm": 0.34893912043486475, "learning_rate": 2.385991620967305e-05, "loss": 0.7822, "step": 1114 }, { "epoch": 0.44435588323204145, "grad_norm": 0.34444018169025264, "learning_rate": 2.383508405390494e-05, "loss": 0.8036, "step": 1115 }, { "epoch": 0.44475440868785493, "grad_norm": 0.3209220544042362, "learning_rate": 2.3810245758804614e-05, "loss": 0.7959, "step": 1116 }, { "epoch": 0.4451529341436684, "grad_norm": 0.3597044151663452, "learning_rate": 2.378540136413405e-05, "loss": 0.8029, "step": 1117 }, { "epoch": 0.4455514595994819, "grad_norm": 0.5678063532761977, "learning_rate": 2.3760550909664987e-05, "loss": 0.7966, "step": 1118 }, { "epoch": 0.44594998505529543, "grad_norm": 0.3399480220411935, "learning_rate": 2.373569443517888e-05, "loss": 0.8075, "step": 1119 }, { "epoch": 0.4463485105111089, "grad_norm": 0.30860916880522943, "learning_rate": 2.3710831980466825e-05, "loss": 0.816, "step": 1120 }, { "epoch": 0.4467470359669224, "grad_norm": 0.30451406346046384, "learning_rate": 2.368596358532947e-05, "loss": 0.7821, "step": 1121 }, { "epoch": 0.44714556142273587, "grad_norm": 0.3274342257348003, "learning_rate": 2.3661089289576973e-05, "loss": 0.8099, "step": 1122 }, { "epoch": 0.44754408687854935, "grad_norm": 0.2990103230908009, "learning_rate": 2.3636209133028957e-05, "loss": 0.8438, "step": 1123 }, { "epoch": 0.44794261233436283, "grad_norm": 0.33085965104050497, "learning_rate": 2.361132315551442e-05, "loss": 0.8148, "step": 1124 }, { "epoch": 0.44834113779017637, "grad_norm": 0.3235378935161311, "learning_rate": 2.3586431396871677e-05, "loss": 0.816, "step": 1125 }, { "epoch": 0.44873966324598985, "grad_norm": 0.30982112537132234, "learning_rate": 2.3561533896948296e-05, "loss": 0.8205, "step": 1126 }, { "epoch": 0.44913818870180333, "grad_norm": 0.3148765787287355, "learning_rate": 2.3536630695601027e-05, "loss": 0.7902, "step": 1127 }, { "epoch": 0.4495367141576168, "grad_norm": 0.3794802774217404, "learning_rate": 2.3511721832695767e-05, "loss": 0.8269, "step": 1128 }, { "epoch": 0.4499352396134303, "grad_norm": 0.3284627503131426, "learning_rate": 2.3486807348107464e-05, "loss": 0.8597, "step": 1129 }, { "epoch": 0.4503337650692438, "grad_norm": 0.31901034421618163, "learning_rate": 2.3461887281720066e-05, "loss": 0.8024, "step": 1130 }, { "epoch": 0.4507322905250573, "grad_norm": 0.35755058361337694, "learning_rate": 2.3436961673426456e-05, "loss": 0.8201, "step": 1131 }, { "epoch": 0.4511308159808708, "grad_norm": 0.37055788579790766, "learning_rate": 2.3412030563128402e-05, "loss": 0.8043, "step": 1132 }, { "epoch": 0.4515293414366843, "grad_norm": 0.29135675861869104, "learning_rate": 2.338709399073645e-05, "loss": 0.8151, "step": 1133 }, { "epoch": 0.45192786689249775, "grad_norm": 0.3342416376182507, "learning_rate": 2.336215199616992e-05, "loss": 0.8368, "step": 1134 }, { "epoch": 0.45232639234831123, "grad_norm": 0.33393406000623976, "learning_rate": 2.33372046193568e-05, "loss": 0.8156, "step": 1135 }, { "epoch": 0.4527249178041247, "grad_norm": 0.2962123245077335, "learning_rate": 2.3312251900233687e-05, "loss": 0.8133, "step": 1136 }, { "epoch": 0.45312344325993825, "grad_norm": 0.3252453832873177, "learning_rate": 2.3287293878745746e-05, "loss": 0.8104, "step": 1137 }, { "epoch": 0.45352196871575173, "grad_norm": 0.31101543033789, "learning_rate": 2.3262330594846615e-05, "loss": 0.8116, "step": 1138 }, { "epoch": 0.4539204941715652, "grad_norm": 0.3142215269516538, "learning_rate": 2.3237362088498366e-05, "loss": 0.8312, "step": 1139 }, { "epoch": 0.4543190196273787, "grad_norm": 0.3156466217062423, "learning_rate": 2.3212388399671434e-05, "loss": 0.8026, "step": 1140 }, { "epoch": 0.4547175450831922, "grad_norm": 0.29130193805422705, "learning_rate": 2.318740956834453e-05, "loss": 0.8208, "step": 1141 }, { "epoch": 0.45511607053900566, "grad_norm": 0.31609767343436057, "learning_rate": 2.3162425634504624e-05, "loss": 0.8048, "step": 1142 }, { "epoch": 0.4555145959948192, "grad_norm": 0.30627780545918254, "learning_rate": 2.3137436638146838e-05, "loss": 0.8256, "step": 1143 }, { "epoch": 0.4559131214506327, "grad_norm": 0.3942343869320896, "learning_rate": 2.3112442619274408e-05, "loss": 0.8231, "step": 1144 }, { "epoch": 0.45631164690644616, "grad_norm": 0.30922816387497437, "learning_rate": 2.3087443617898585e-05, "loss": 0.8128, "step": 1145 }, { "epoch": 0.45671017236225964, "grad_norm": 0.31257709643441933, "learning_rate": 2.3062439674038643e-05, "loss": 0.7816, "step": 1146 }, { "epoch": 0.4571086978180731, "grad_norm": 0.3125099111968418, "learning_rate": 2.3037430827721724e-05, "loss": 0.8511, "step": 1147 }, { "epoch": 0.4575072232738866, "grad_norm": 0.3259270287494568, "learning_rate": 2.3012417118982833e-05, "loss": 0.8078, "step": 1148 }, { "epoch": 0.45790574872970013, "grad_norm": 0.4841424847659405, "learning_rate": 2.298739858786477e-05, "loss": 0.846, "step": 1149 }, { "epoch": 0.4583042741855136, "grad_norm": 0.30651971893302865, "learning_rate": 2.2962375274418042e-05, "loss": 0.7836, "step": 1150 }, { "epoch": 0.4587027996413271, "grad_norm": 0.29130109838002205, "learning_rate": 2.2937347218700814e-05, "loss": 0.8251, "step": 1151 }, { "epoch": 0.4591013250971406, "grad_norm": 0.29216772346283687, "learning_rate": 2.2912314460778838e-05, "loss": 0.7934, "step": 1152 }, { "epoch": 0.45949985055295406, "grad_norm": 0.28659925320048857, "learning_rate": 2.2887277040725416e-05, "loss": 0.8132, "step": 1153 }, { "epoch": 0.45989837600876754, "grad_norm": 0.2821978280610863, "learning_rate": 2.2862234998621276e-05, "loss": 0.8018, "step": 1154 }, { "epoch": 0.4602969014645811, "grad_norm": 0.3022683438134659, "learning_rate": 2.2837188374554584e-05, "loss": 0.8011, "step": 1155 }, { "epoch": 0.46069542692039456, "grad_norm": 0.29620670062698495, "learning_rate": 2.281213720862081e-05, "loss": 0.7884, "step": 1156 }, { "epoch": 0.46109395237620804, "grad_norm": 0.2804223684367047, "learning_rate": 2.2787081540922716e-05, "loss": 0.8016, "step": 1157 }, { "epoch": 0.4614924778320215, "grad_norm": 0.30149704387252646, "learning_rate": 2.2762021411570254e-05, "loss": 0.8044, "step": 1158 }, { "epoch": 0.461891003287835, "grad_norm": 0.28566950350769055, "learning_rate": 2.273695686068053e-05, "loss": 0.8113, "step": 1159 }, { "epoch": 0.4622895287436485, "grad_norm": 0.27932263683794883, "learning_rate": 2.2711887928377725e-05, "loss": 0.8178, "step": 1160 }, { "epoch": 0.462688054199462, "grad_norm": 0.3504836230780002, "learning_rate": 2.2686814654793036e-05, "loss": 0.8276, "step": 1161 }, { "epoch": 0.4630865796552755, "grad_norm": 0.31710148422205037, "learning_rate": 2.26617370800646e-05, "loss": 0.8075, "step": 1162 }, { "epoch": 0.463485105111089, "grad_norm": 0.288322551014853, "learning_rate": 2.2636655244337455e-05, "loss": 0.8099, "step": 1163 }, { "epoch": 0.46388363056690246, "grad_norm": 0.30696335215944015, "learning_rate": 2.2611569187763448e-05, "loss": 0.8167, "step": 1164 }, { "epoch": 0.46428215602271594, "grad_norm": 0.2740251270995111, "learning_rate": 2.258647895050118e-05, "loss": 0.8122, "step": 1165 }, { "epoch": 0.4646806814785294, "grad_norm": 0.30100618811204716, "learning_rate": 2.2561384572715957e-05, "loss": 0.8124, "step": 1166 }, { "epoch": 0.4650792069343429, "grad_norm": 0.28921422085766796, "learning_rate": 2.2536286094579717e-05, "loss": 0.8344, "step": 1167 }, { "epoch": 0.46547773239015644, "grad_norm": 0.30173959947735146, "learning_rate": 2.2511183556270937e-05, "loss": 0.8326, "step": 1168 }, { "epoch": 0.4658762578459699, "grad_norm": 0.5060784189623851, "learning_rate": 2.2486076997974617e-05, "loss": 0.7857, "step": 1169 }, { "epoch": 0.4662747833017834, "grad_norm": 0.29228478601288754, "learning_rate": 2.2460966459882184e-05, "loss": 0.7995, "step": 1170 }, { "epoch": 0.4666733087575969, "grad_norm": 0.31868507689912057, "learning_rate": 2.2435851982191426e-05, "loss": 0.8323, "step": 1171 }, { "epoch": 0.46707183421341036, "grad_norm": 0.27865315868245927, "learning_rate": 2.2410733605106462e-05, "loss": 0.7983, "step": 1172 }, { "epoch": 0.46747035966922384, "grad_norm": 0.29759002153633596, "learning_rate": 2.238561136883764e-05, "loss": 0.8044, "step": 1173 }, { "epoch": 0.4678688851250374, "grad_norm": 0.2846486337810441, "learning_rate": 2.236048531360147e-05, "loss": 0.8111, "step": 1174 }, { "epoch": 0.46826741058085086, "grad_norm": 0.3118599392906745, "learning_rate": 2.2335355479620605e-05, "loss": 0.802, "step": 1175 }, { "epoch": 0.46866593603666434, "grad_norm": 0.30270097977856236, "learning_rate": 2.231022190712373e-05, "loss": 0.802, "step": 1176 }, { "epoch": 0.4690644614924778, "grad_norm": 0.2817261828834847, "learning_rate": 2.228508463634551e-05, "loss": 0.8007, "step": 1177 }, { "epoch": 0.4694629869482913, "grad_norm": 0.3274731513059302, "learning_rate": 2.225994370752655e-05, "loss": 0.8138, "step": 1178 }, { "epoch": 0.4698615124041048, "grad_norm": 0.2968053602546118, "learning_rate": 2.2234799160913285e-05, "loss": 0.8239, "step": 1179 }, { "epoch": 0.4702600378599183, "grad_norm": 0.9004493930737405, "learning_rate": 2.2209651036757965e-05, "loss": 0.8121, "step": 1180 }, { "epoch": 0.4706585633157318, "grad_norm": 0.29343035187513045, "learning_rate": 2.218449937531856e-05, "loss": 0.8062, "step": 1181 }, { "epoch": 0.4710570887715453, "grad_norm": 0.3251626790620503, "learning_rate": 2.2159344216858693e-05, "loss": 0.8171, "step": 1182 }, { "epoch": 0.47145561422735877, "grad_norm": 0.3008660196180082, "learning_rate": 2.2134185601647595e-05, "loss": 0.8233, "step": 1183 }, { "epoch": 0.47185413968317225, "grad_norm": 0.31587152291948645, "learning_rate": 2.2109023569960028e-05, "loss": 0.7893, "step": 1184 }, { "epoch": 0.4722526651389857, "grad_norm": 0.3109368684781642, "learning_rate": 2.208385816207622e-05, "loss": 0.8351, "step": 1185 }, { "epoch": 0.47265119059479926, "grad_norm": 0.3585332576145692, "learning_rate": 2.2058689418281806e-05, "loss": 0.8235, "step": 1186 }, { "epoch": 0.47304971605061275, "grad_norm": 0.36347361575702536, "learning_rate": 2.2033517378867773e-05, "loss": 0.8333, "step": 1187 }, { "epoch": 0.4734482415064262, "grad_norm": 0.3104981737491085, "learning_rate": 2.2008342084130357e-05, "loss": 0.7985, "step": 1188 }, { "epoch": 0.4738467669622397, "grad_norm": 0.29070707839217663, "learning_rate": 2.1983163574371038e-05, "loss": 0.8135, "step": 1189 }, { "epoch": 0.4742452924180532, "grad_norm": 0.3019633554231252, "learning_rate": 2.1957981889896413e-05, "loss": 0.8042, "step": 1190 }, { "epoch": 0.47464381787386667, "grad_norm": 0.28671960218113185, "learning_rate": 2.1932797071018176e-05, "loss": 0.7833, "step": 1191 }, { "epoch": 0.4750423433296802, "grad_norm": 0.30296654651092136, "learning_rate": 2.1907609158053043e-05, "loss": 0.802, "step": 1192 }, { "epoch": 0.4754408687854937, "grad_norm": 0.30792479960608926, "learning_rate": 2.1882418191322667e-05, "loss": 0.7874, "step": 1193 }, { "epoch": 0.47583939424130717, "grad_norm": 0.39407347199239423, "learning_rate": 2.18572242111536e-05, "loss": 0.8171, "step": 1194 }, { "epoch": 0.47623791969712065, "grad_norm": 0.2981154461238015, "learning_rate": 2.183202725787723e-05, "loss": 0.8202, "step": 1195 }, { "epoch": 0.47663644515293413, "grad_norm": 0.2883120319508124, "learning_rate": 2.1806827371829686e-05, "loss": 0.8354, "step": 1196 }, { "epoch": 0.4770349706087476, "grad_norm": 0.29569950551843616, "learning_rate": 2.1781624593351788e-05, "loss": 0.8034, "step": 1197 }, { "epoch": 0.47743349606456115, "grad_norm": 0.2942079747064485, "learning_rate": 2.175641896278901e-05, "loss": 0.8423, "step": 1198 }, { "epoch": 0.47783202152037463, "grad_norm": 0.31504833020024914, "learning_rate": 2.1731210520491365e-05, "loss": 0.7956, "step": 1199 }, { "epoch": 0.4782305469761881, "grad_norm": 0.27602156434261366, "learning_rate": 2.1705999306813378e-05, "loss": 0.7789, "step": 1200 }, { "epoch": 0.4786290724320016, "grad_norm": 0.3159340649254405, "learning_rate": 2.168078536211403e-05, "loss": 0.8196, "step": 1201 }, { "epoch": 0.47902759788781507, "grad_norm": 0.30368372482852835, "learning_rate": 2.1655568726756643e-05, "loss": 0.8199, "step": 1202 }, { "epoch": 0.47942612334362855, "grad_norm": 0.3082856381822439, "learning_rate": 2.163034944110886e-05, "loss": 0.8217, "step": 1203 }, { "epoch": 0.4798246487994421, "grad_norm": 0.30444993184134234, "learning_rate": 2.1605127545542572e-05, "loss": 0.81, "step": 1204 }, { "epoch": 0.48022317425525557, "grad_norm": 0.3053503071698002, "learning_rate": 2.1579903080433837e-05, "loss": 0.7724, "step": 1205 }, { "epoch": 0.48062169971106905, "grad_norm": 0.2907609764564475, "learning_rate": 2.1554676086162827e-05, "loss": 0.7939, "step": 1206 }, { "epoch": 0.48102022516688253, "grad_norm": 0.30438913548426777, "learning_rate": 2.152944660311378e-05, "loss": 0.8124, "step": 1207 }, { "epoch": 0.481418750622696, "grad_norm": 0.2916803784401073, "learning_rate": 2.1504214671674903e-05, "loss": 0.8002, "step": 1208 }, { "epoch": 0.4818172760785095, "grad_norm": 0.3118580484823128, "learning_rate": 2.147898033223831e-05, "loss": 0.8152, "step": 1209 }, { "epoch": 0.48221580153432303, "grad_norm": 0.30562499279688954, "learning_rate": 2.1453743625200004e-05, "loss": 0.7978, "step": 1210 }, { "epoch": 0.4826143269901365, "grad_norm": 0.29452400424891173, "learning_rate": 2.142850459095975e-05, "loss": 0.8083, "step": 1211 }, { "epoch": 0.48301285244595, "grad_norm": 0.29629314495355424, "learning_rate": 2.1403263269921046e-05, "loss": 0.8073, "step": 1212 }, { "epoch": 0.4834113779017635, "grad_norm": 0.32650392294542924, "learning_rate": 2.1378019702491054e-05, "loss": 0.7924, "step": 1213 }, { "epoch": 0.48380990335757695, "grad_norm": 0.30150320495591154, "learning_rate": 2.135277392908053e-05, "loss": 0.8531, "step": 1214 }, { "epoch": 0.48420842881339043, "grad_norm": 0.310619189184776, "learning_rate": 2.132752599010376e-05, "loss": 0.834, "step": 1215 }, { "epoch": 0.48460695426920397, "grad_norm": 0.32562782034606635, "learning_rate": 2.1302275925978508e-05, "loss": 0.7904, "step": 1216 }, { "epoch": 0.48500547972501745, "grad_norm": 0.3017176154191894, "learning_rate": 2.1277023777125915e-05, "loss": 0.8194, "step": 1217 }, { "epoch": 0.48540400518083093, "grad_norm": 0.32023476312765164, "learning_rate": 2.1251769583970484e-05, "loss": 0.7893, "step": 1218 }, { "epoch": 0.4858025306366444, "grad_norm": 0.28781956877783055, "learning_rate": 2.122651338693998e-05, "loss": 0.8156, "step": 1219 }, { "epoch": 0.4862010560924579, "grad_norm": 1.2149233879740187, "learning_rate": 2.1201255226465375e-05, "loss": 0.8266, "step": 1220 }, { "epoch": 0.4865995815482714, "grad_norm": 0.2992816242260791, "learning_rate": 2.1175995142980793e-05, "loss": 0.8263, "step": 1221 }, { "epoch": 0.4869981070040849, "grad_norm": 0.3235204400431873, "learning_rate": 2.115073317692342e-05, "loss": 0.8074, "step": 1222 }, { "epoch": 0.4873966324598984, "grad_norm": 0.2995871348511909, "learning_rate": 2.112546936873347e-05, "loss": 0.8347, "step": 1223 }, { "epoch": 0.4877951579157119, "grad_norm": 0.3268455050694444, "learning_rate": 2.110020375885411e-05, "loss": 0.8104, "step": 1224 }, { "epoch": 0.48819368337152536, "grad_norm": 0.31345643601355155, "learning_rate": 2.1074936387731367e-05, "loss": 0.8271, "step": 1225 }, { "epoch": 0.48859220882733884, "grad_norm": 0.37781746616538014, "learning_rate": 2.1049667295814113e-05, "loss": 0.8276, "step": 1226 }, { "epoch": 0.4889907342831523, "grad_norm": 0.30667467990270375, "learning_rate": 2.1024396523553955e-05, "loss": 0.7966, "step": 1227 }, { "epoch": 0.48938925973896585, "grad_norm": 0.3116435731085305, "learning_rate": 2.099912411140521e-05, "loss": 0.801, "step": 1228 }, { "epoch": 0.48978778519477933, "grad_norm": 0.3045824871287522, "learning_rate": 2.0973850099824807e-05, "loss": 0.76, "step": 1229 }, { "epoch": 0.4901863106505928, "grad_norm": 0.32180564748889195, "learning_rate": 2.094857452927224e-05, "loss": 0.8158, "step": 1230 }, { "epoch": 0.4905848361064063, "grad_norm": 0.9714532194362665, "learning_rate": 2.09232974402095e-05, "loss": 0.7917, "step": 1231 }, { "epoch": 0.4909833615622198, "grad_norm": 0.4083517397563029, "learning_rate": 2.089801887310099e-05, "loss": 0.7759, "step": 1232 }, { "epoch": 0.49138188701803326, "grad_norm": 0.32375580190481257, "learning_rate": 2.087273886841351e-05, "loss": 0.8225, "step": 1233 }, { "epoch": 0.49178041247384674, "grad_norm": 0.29897291559360073, "learning_rate": 2.0847457466616135e-05, "loss": 0.8223, "step": 1234 }, { "epoch": 0.4921789379296603, "grad_norm": 0.6264426925966912, "learning_rate": 2.08221747081802e-05, "loss": 0.806, "step": 1235 }, { "epoch": 0.49257746338547376, "grad_norm": 0.3393552807659732, "learning_rate": 2.079689063357919e-05, "loss": 0.808, "step": 1236 }, { "epoch": 0.49297598884128724, "grad_norm": 0.48064261120943, "learning_rate": 2.0771605283288716e-05, "loss": 0.8028, "step": 1237 }, { "epoch": 0.4933745142971007, "grad_norm": 0.30581132700814045, "learning_rate": 2.074631869778641e-05, "loss": 0.8067, "step": 1238 }, { "epoch": 0.4937730397529142, "grad_norm": 0.29530312754650695, "learning_rate": 2.0721030917551905e-05, "loss": 0.8212, "step": 1239 }, { "epoch": 0.4941715652087277, "grad_norm": 0.29055485043935136, "learning_rate": 2.0695741983066724e-05, "loss": 0.8193, "step": 1240 }, { "epoch": 0.4945700906645412, "grad_norm": 0.31170603570838856, "learning_rate": 2.0670451934814252e-05, "loss": 0.7959, "step": 1241 }, { "epoch": 0.4949686161203547, "grad_norm": 0.28393384738922395, "learning_rate": 2.0645160813279657e-05, "loss": 0.8113, "step": 1242 }, { "epoch": 0.4953671415761682, "grad_norm": 0.31099237786422546, "learning_rate": 2.0619868658949818e-05, "loss": 0.8277, "step": 1243 }, { "epoch": 0.49576566703198166, "grad_norm": 0.4543341488542098, "learning_rate": 2.059457551231327e-05, "loss": 0.8053, "step": 1244 }, { "epoch": 0.49616419248779514, "grad_norm": 0.3934508739825585, "learning_rate": 2.0569281413860147e-05, "loss": 0.821, "step": 1245 }, { "epoch": 0.4965627179436086, "grad_norm": 0.3041220289880547, "learning_rate": 2.054398640408208e-05, "loss": 0.7835, "step": 1246 }, { "epoch": 0.49696124339942216, "grad_norm": 0.3121481686636135, "learning_rate": 2.0518690523472182e-05, "loss": 0.8196, "step": 1247 }, { "epoch": 0.49735976885523564, "grad_norm": 0.29339385739102847, "learning_rate": 2.0493393812524967e-05, "loss": 0.812, "step": 1248 }, { "epoch": 0.4977582943110491, "grad_norm": 0.6381668064023208, "learning_rate": 2.0468096311736247e-05, "loss": 0.8051, "step": 1249 }, { "epoch": 0.4981568197668626, "grad_norm": 0.30166068852688105, "learning_rate": 2.044279806160313e-05, "loss": 0.787, "step": 1250 }, { "epoch": 0.4985553452226761, "grad_norm": 0.28274022253823955, "learning_rate": 2.0417499102623903e-05, "loss": 0.8003, "step": 1251 }, { "epoch": 0.49895387067848956, "grad_norm": 0.3796924292206021, "learning_rate": 2.0392199475297995e-05, "loss": 0.7982, "step": 1252 }, { "epoch": 0.4993523961343031, "grad_norm": 0.2853722232096178, "learning_rate": 2.0366899220125903e-05, "loss": 0.8013, "step": 1253 }, { "epoch": 0.4997509215901166, "grad_norm": 0.31573490109402036, "learning_rate": 2.034159837760914e-05, "loss": 0.8147, "step": 1254 }, { "epoch": 0.50014944704593, "grad_norm": 0.2765481712079679, "learning_rate": 2.0316296988250138e-05, "loss": 0.7995, "step": 1255 }, { "epoch": 0.5005479725017435, "grad_norm": 0.2994449499838975, "learning_rate": 2.029099509255223e-05, "loss": 0.7946, "step": 1256 }, { "epoch": 0.5009464979575571, "grad_norm": 0.3207532131664091, "learning_rate": 2.026569273101954e-05, "loss": 0.8038, "step": 1257 }, { "epoch": 0.5013450234133705, "grad_norm": 0.2829753955420768, "learning_rate": 2.0240389944156937e-05, "loss": 0.8001, "step": 1258 }, { "epoch": 0.501743548869184, "grad_norm": 0.27998354424049926, "learning_rate": 2.021508677246999e-05, "loss": 0.791, "step": 1259 }, { "epoch": 0.5021420743249975, "grad_norm": 0.2913911881200998, "learning_rate": 2.018978325646486e-05, "loss": 0.7914, "step": 1260 }, { "epoch": 0.502540599780811, "grad_norm": 0.26963096722494334, "learning_rate": 2.0164479436648272e-05, "loss": 0.8406, "step": 1261 }, { "epoch": 0.5029391252366245, "grad_norm": 0.3010795830435557, "learning_rate": 2.0139175353527446e-05, "loss": 0.8078, "step": 1262 }, { "epoch": 0.503337650692438, "grad_norm": 0.30960536952730017, "learning_rate": 2.0113871047610016e-05, "loss": 0.8074, "step": 1263 }, { "epoch": 0.5037361761482515, "grad_norm": 0.26906634414413455, "learning_rate": 2.0088566559403953e-05, "loss": 0.7935, "step": 1264 }, { "epoch": 0.5041347016040649, "grad_norm": 0.34646731409844644, "learning_rate": 2.006326192941755e-05, "loss": 0.8442, "step": 1265 }, { "epoch": 0.5045332270598785, "grad_norm": 0.2726972871873017, "learning_rate": 2.003795719815931e-05, "loss": 0.7859, "step": 1266 }, { "epoch": 0.5049317525156919, "grad_norm": 0.3143394544398179, "learning_rate": 2.0012652406137903e-05, "loss": 0.8307, "step": 1267 }, { "epoch": 0.5053302779715054, "grad_norm": 0.2631801881501474, "learning_rate": 1.99873475938621e-05, "loss": 0.7999, "step": 1268 }, { "epoch": 0.505728803427319, "grad_norm": 0.34508087706819923, "learning_rate": 1.9962042801840698e-05, "loss": 0.8091, "step": 1269 }, { "epoch": 0.5061273288831324, "grad_norm": 0.27438242812890384, "learning_rate": 1.9936738070582455e-05, "loss": 0.798, "step": 1270 }, { "epoch": 0.5065258543389459, "grad_norm": 0.3025634657688614, "learning_rate": 1.991143344059605e-05, "loss": 0.7952, "step": 1271 }, { "epoch": 0.5069243797947593, "grad_norm": 0.2845789431308592, "learning_rate": 1.988612895238999e-05, "loss": 0.8374, "step": 1272 }, { "epoch": 0.5073229052505729, "grad_norm": 0.30248035578518695, "learning_rate": 1.986082464647255e-05, "loss": 0.7864, "step": 1273 }, { "epoch": 0.5077214307063864, "grad_norm": 0.2950710488906475, "learning_rate": 1.9835520563351735e-05, "loss": 0.8288, "step": 1274 }, { "epoch": 0.5081199561621998, "grad_norm": 0.26824757799025784, "learning_rate": 1.9810216743535146e-05, "loss": 0.8364, "step": 1275 }, { "epoch": 0.5085184816180134, "grad_norm": 0.2849419128102798, "learning_rate": 1.9784913227530024e-05, "loss": 0.8236, "step": 1276 }, { "epoch": 0.5089170070738268, "grad_norm": 0.3103889603819969, "learning_rate": 1.975961005584307e-05, "loss": 0.8136, "step": 1277 }, { "epoch": 0.5093155325296403, "grad_norm": 0.6303290018451543, "learning_rate": 1.9734307268980467e-05, "loss": 0.8311, "step": 1278 }, { "epoch": 0.5097140579854538, "grad_norm": 0.32732619537234586, "learning_rate": 1.9709004907447774e-05, "loss": 0.8221, "step": 1279 }, { "epoch": 0.5101125834412673, "grad_norm": 0.28339108969670607, "learning_rate": 1.9683703011749862e-05, "loss": 0.7966, "step": 1280 }, { "epoch": 0.5105111088970808, "grad_norm": 0.3203578395612973, "learning_rate": 1.965840162239087e-05, "loss": 0.8137, "step": 1281 }, { "epoch": 0.5109096343528943, "grad_norm": 0.27176747745707136, "learning_rate": 1.96331007798741e-05, "loss": 0.8078, "step": 1282 }, { "epoch": 0.5113081598087078, "grad_norm": 0.29516820299549673, "learning_rate": 1.9607800524702015e-05, "loss": 0.8209, "step": 1283 }, { "epoch": 0.5117066852645212, "grad_norm": 0.26212656038325677, "learning_rate": 1.9582500897376104e-05, "loss": 0.8141, "step": 1284 }, { "epoch": 0.5121052107203348, "grad_norm": 0.28250007105261504, "learning_rate": 1.955720193839687e-05, "loss": 0.8278, "step": 1285 }, { "epoch": 0.5125037361761483, "grad_norm": 0.26685759222958566, "learning_rate": 1.953190368826376e-05, "loss": 0.8339, "step": 1286 }, { "epoch": 0.5129022616319617, "grad_norm": 0.29022333673533535, "learning_rate": 1.9506606187475036e-05, "loss": 0.8315, "step": 1287 }, { "epoch": 0.5133007870877753, "grad_norm": 0.2670289567076886, "learning_rate": 1.9481309476527825e-05, "loss": 0.801, "step": 1288 }, { "epoch": 0.5136993125435887, "grad_norm": 0.289510280019879, "learning_rate": 1.9456013595917928e-05, "loss": 0.812, "step": 1289 }, { "epoch": 0.5140978379994022, "grad_norm": 0.3128114319953551, "learning_rate": 1.9430718586139863e-05, "loss": 0.8095, "step": 1290 }, { "epoch": 0.5144963634552157, "grad_norm": 0.2888978962753298, "learning_rate": 1.9405424487686732e-05, "loss": 0.79, "step": 1291 }, { "epoch": 0.5148948889110292, "grad_norm": 0.30521651593807825, "learning_rate": 1.9380131341050185e-05, "loss": 0.8137, "step": 1292 }, { "epoch": 0.5152934143668427, "grad_norm": 0.2722787387877988, "learning_rate": 1.935483918672035e-05, "loss": 0.8291, "step": 1293 }, { "epoch": 0.5156919398226562, "grad_norm": 0.2863692337341115, "learning_rate": 1.932954806518575e-05, "loss": 0.7981, "step": 1294 }, { "epoch": 0.5160904652784697, "grad_norm": 0.2759777323624655, "learning_rate": 1.9304258016933282e-05, "loss": 0.8272, "step": 1295 }, { "epoch": 0.5164889907342831, "grad_norm": 0.27713843658608434, "learning_rate": 1.92789690824481e-05, "loss": 0.8079, "step": 1296 }, { "epoch": 0.5168875161900967, "grad_norm": 0.2877077017647955, "learning_rate": 1.92536813022136e-05, "loss": 0.7918, "step": 1297 }, { "epoch": 0.5172860416459101, "grad_norm": 0.28949094300241585, "learning_rate": 1.9228394716711288e-05, "loss": 0.7969, "step": 1298 }, { "epoch": 0.5176845671017236, "grad_norm": 0.29697989743375497, "learning_rate": 1.9203109366420812e-05, "loss": 0.7928, "step": 1299 }, { "epoch": 0.5180830925575372, "grad_norm": 0.27889648874882045, "learning_rate": 1.917782529181981e-05, "loss": 0.8233, "step": 1300 }, { "epoch": 0.5184816180133506, "grad_norm": 0.3023364181088352, "learning_rate": 1.9152542533383872e-05, "loss": 0.8312, "step": 1301 }, { "epoch": 0.5188801434691641, "grad_norm": 0.28357607259449, "learning_rate": 1.9127261131586503e-05, "loss": 0.7801, "step": 1302 }, { "epoch": 0.5192786689249775, "grad_norm": 0.2869887242640123, "learning_rate": 1.910198112689902e-05, "loss": 0.7965, "step": 1303 }, { "epoch": 0.5196771943807911, "grad_norm": 0.28743333022015244, "learning_rate": 1.9076702559790514e-05, "loss": 0.8146, "step": 1304 }, { "epoch": 0.5200757198366046, "grad_norm": 0.284017183782701, "learning_rate": 1.9051425470727766e-05, "loss": 0.7865, "step": 1305 }, { "epoch": 0.520474245292418, "grad_norm": 0.29268712998816515, "learning_rate": 1.9026149900175193e-05, "loss": 0.7996, "step": 1306 }, { "epoch": 0.5208727707482316, "grad_norm": 0.28999045805168566, "learning_rate": 1.9000875888594792e-05, "loss": 0.849, "step": 1307 }, { "epoch": 0.521271296204045, "grad_norm": 0.30459398540455407, "learning_rate": 1.8975603476446048e-05, "loss": 0.7935, "step": 1308 }, { "epoch": 0.5216698216598585, "grad_norm": 0.2838650093705641, "learning_rate": 1.89503327041859e-05, "loss": 0.8034, "step": 1309 }, { "epoch": 0.522068347115672, "grad_norm": 0.304766254772995, "learning_rate": 1.8925063612268637e-05, "loss": 0.846, "step": 1310 }, { "epoch": 0.5224668725714855, "grad_norm": 0.27645008842126473, "learning_rate": 1.8899796241145903e-05, "loss": 0.8269, "step": 1311 }, { "epoch": 0.522865398027299, "grad_norm": 0.2952376158549396, "learning_rate": 1.8874530631266536e-05, "loss": 0.8369, "step": 1312 }, { "epoch": 0.5232639234831125, "grad_norm": 0.33296861229967156, "learning_rate": 1.8849266823076578e-05, "loss": 0.8134, "step": 1313 }, { "epoch": 0.523662448938926, "grad_norm": 0.2866595965213398, "learning_rate": 1.8824004857019217e-05, "loss": 0.8192, "step": 1314 }, { "epoch": 0.5240609743947394, "grad_norm": 0.4924611590945922, "learning_rate": 1.879874477353463e-05, "loss": 0.7903, "step": 1315 }, { "epoch": 0.524459499850553, "grad_norm": 0.27677116299415827, "learning_rate": 1.877348661306003e-05, "loss": 0.8102, "step": 1316 }, { "epoch": 0.5248580253063665, "grad_norm": 0.28883962158261584, "learning_rate": 1.8748230416029522e-05, "loss": 0.7984, "step": 1317 }, { "epoch": 0.5252565507621799, "grad_norm": 0.281009978014599, "learning_rate": 1.8722976222874095e-05, "loss": 0.8045, "step": 1318 }, { "epoch": 0.5256550762179935, "grad_norm": 0.3095342467124618, "learning_rate": 1.8697724074021502e-05, "loss": 0.767, "step": 1319 }, { "epoch": 0.5260536016738069, "grad_norm": 0.29319346123143347, "learning_rate": 1.8672474009896242e-05, "loss": 0.8372, "step": 1320 }, { "epoch": 0.5264521271296204, "grad_norm": 0.47782909290265757, "learning_rate": 1.8647226070919474e-05, "loss": 0.8488, "step": 1321 }, { "epoch": 0.5268506525854338, "grad_norm": 0.3110245262948928, "learning_rate": 1.862198029750895e-05, "loss": 0.7963, "step": 1322 }, { "epoch": 0.5272491780412474, "grad_norm": 0.2917881624752996, "learning_rate": 1.8596736730078967e-05, "loss": 0.7952, "step": 1323 }, { "epoch": 0.5276477034970609, "grad_norm": 0.33165379448294435, "learning_rate": 1.857149540904026e-05, "loss": 0.8076, "step": 1324 }, { "epoch": 0.5280462289528743, "grad_norm": 0.4239553010821896, "learning_rate": 1.8546256374800006e-05, "loss": 0.8028, "step": 1325 }, { "epoch": 0.5284447544086879, "grad_norm": 0.2734465341467207, "learning_rate": 1.8521019667761697e-05, "loss": 0.794, "step": 1326 }, { "epoch": 0.5288432798645013, "grad_norm": 0.2629858746393782, "learning_rate": 1.8495785328325104e-05, "loss": 0.8112, "step": 1327 }, { "epoch": 0.5292418053203148, "grad_norm": 0.28632746629019823, "learning_rate": 1.8470553396886222e-05, "loss": 0.8052, "step": 1328 }, { "epoch": 0.5296403307761284, "grad_norm": 0.2693728963637755, "learning_rate": 1.8445323913837173e-05, "loss": 0.797, "step": 1329 }, { "epoch": 0.5300388562319418, "grad_norm": 0.29114792078325186, "learning_rate": 1.8420096919566173e-05, "loss": 0.8199, "step": 1330 }, { "epoch": 0.5304373816877553, "grad_norm": 0.2806667770430771, "learning_rate": 1.8394872454457434e-05, "loss": 0.7832, "step": 1331 }, { "epoch": 0.5308359071435688, "grad_norm": 0.28182635320788874, "learning_rate": 1.836965055889115e-05, "loss": 0.7998, "step": 1332 }, { "epoch": 0.5312344325993823, "grad_norm": 0.3254325490129574, "learning_rate": 1.8344431273243364e-05, "loss": 0.8112, "step": 1333 }, { "epoch": 0.5316329580551957, "grad_norm": 0.29483982391186925, "learning_rate": 1.8319214637885975e-05, "loss": 0.8025, "step": 1334 }, { "epoch": 0.5320314835110093, "grad_norm": 0.2552432370606682, "learning_rate": 1.829400069318663e-05, "loss": 0.7978, "step": 1335 }, { "epoch": 0.5324300089668228, "grad_norm": 0.2923821069068519, "learning_rate": 1.826878947950864e-05, "loss": 0.7833, "step": 1336 }, { "epoch": 0.5328285344226362, "grad_norm": 0.26602672952480433, "learning_rate": 1.8243581037211005e-05, "loss": 0.7893, "step": 1337 }, { "epoch": 0.5332270598784498, "grad_norm": 0.26880063097474627, "learning_rate": 1.821837540664822e-05, "loss": 0.7862, "step": 1338 }, { "epoch": 0.5336255853342632, "grad_norm": 0.2708329335402036, "learning_rate": 1.8193172628170324e-05, "loss": 0.8108, "step": 1339 }, { "epoch": 0.5340241107900767, "grad_norm": 0.28229072758383317, "learning_rate": 1.8167972742122773e-05, "loss": 0.8675, "step": 1340 }, { "epoch": 0.5344226362458903, "grad_norm": 0.2741069117172231, "learning_rate": 1.81427757888464e-05, "loss": 0.8261, "step": 1341 }, { "epoch": 0.5348211617017037, "grad_norm": 0.27606049985568326, "learning_rate": 1.811758180867734e-05, "loss": 0.8128, "step": 1342 }, { "epoch": 0.5352196871575172, "grad_norm": 0.27575883416758074, "learning_rate": 1.8092390841946964e-05, "loss": 0.7975, "step": 1343 }, { "epoch": 0.5356182126133306, "grad_norm": 0.27470419217590547, "learning_rate": 1.8067202928981827e-05, "loss": 0.801, "step": 1344 }, { "epoch": 0.5360167380691442, "grad_norm": 0.2682028369114076, "learning_rate": 1.804201811010359e-05, "loss": 0.7992, "step": 1345 }, { "epoch": 0.5364152635249576, "grad_norm": 0.29741163933246206, "learning_rate": 1.8016836425628972e-05, "loss": 0.7863, "step": 1346 }, { "epoch": 0.5368137889807711, "grad_norm": 0.2879307582320043, "learning_rate": 1.7991657915869646e-05, "loss": 0.7912, "step": 1347 }, { "epoch": 0.5372123144365847, "grad_norm": 0.26970303031329906, "learning_rate": 1.7966482621132227e-05, "loss": 0.83, "step": 1348 }, { "epoch": 0.5376108398923981, "grad_norm": 0.2899878970961642, "learning_rate": 1.7941310581718197e-05, "loss": 0.8143, "step": 1349 }, { "epoch": 0.5380093653482116, "grad_norm": 0.2749787514839584, "learning_rate": 1.7916141837923787e-05, "loss": 0.7954, "step": 1350 }, { "epoch": 0.5384078908040251, "grad_norm": 0.27467702468985844, "learning_rate": 1.7890976430039982e-05, "loss": 0.7982, "step": 1351 }, { "epoch": 0.5388064162598386, "grad_norm": 0.2618705303695261, "learning_rate": 1.786581439835241e-05, "loss": 0.8195, "step": 1352 }, { "epoch": 0.5392049417156521, "grad_norm": 0.2714594323337975, "learning_rate": 1.7840655783141313e-05, "loss": 0.796, "step": 1353 }, { "epoch": 0.5396034671714656, "grad_norm": 0.28811188495556306, "learning_rate": 1.7815500624681444e-05, "loss": 0.7994, "step": 1354 }, { "epoch": 0.5400019926272791, "grad_norm": 0.2720623478220906, "learning_rate": 1.779034896324204e-05, "loss": 0.8153, "step": 1355 }, { "epoch": 0.5404005180830925, "grad_norm": 0.26375062989547793, "learning_rate": 1.7765200839086722e-05, "loss": 0.8091, "step": 1356 }, { "epoch": 0.5407990435389061, "grad_norm": 0.2692041660964484, "learning_rate": 1.774005629247346e-05, "loss": 0.8079, "step": 1357 }, { "epoch": 0.5411975689947195, "grad_norm": 0.26724517612106163, "learning_rate": 1.77149153636545e-05, "loss": 0.8255, "step": 1358 }, { "epoch": 0.541596094450533, "grad_norm": 0.2767757640601006, "learning_rate": 1.7689778092876276e-05, "loss": 0.7899, "step": 1359 }, { "epoch": 0.5419946199063466, "grad_norm": 0.4018120080677502, "learning_rate": 1.7664644520379398e-05, "loss": 0.8113, "step": 1360 }, { "epoch": 0.54239314536216, "grad_norm": 0.31258004159467684, "learning_rate": 1.7639514686398537e-05, "loss": 0.8172, "step": 1361 }, { "epoch": 0.5427916708179735, "grad_norm": 0.3470011840822337, "learning_rate": 1.7614388631162365e-05, "loss": 0.7933, "step": 1362 }, { "epoch": 0.543190196273787, "grad_norm": 0.3043763377673315, "learning_rate": 1.758926639489354e-05, "loss": 0.8135, "step": 1363 }, { "epoch": 0.5435887217296005, "grad_norm": 0.2923964849291302, "learning_rate": 1.7564148017808578e-05, "loss": 0.7818, "step": 1364 }, { "epoch": 0.5439872471854139, "grad_norm": 0.3065609901064694, "learning_rate": 1.753903354011783e-05, "loss": 0.8423, "step": 1365 }, { "epoch": 0.5443857726412275, "grad_norm": 0.2985623055209066, "learning_rate": 1.751392300202539e-05, "loss": 0.8157, "step": 1366 }, { "epoch": 0.544784298097041, "grad_norm": 0.2786406179918027, "learning_rate": 1.7488816443729066e-05, "loss": 0.8133, "step": 1367 }, { "epoch": 0.5451828235528544, "grad_norm": 0.30926673491457163, "learning_rate": 1.746371390542029e-05, "loss": 0.8133, "step": 1368 }, { "epoch": 0.545581349008668, "grad_norm": 0.2641540209794052, "learning_rate": 1.743861542728404e-05, "loss": 0.7962, "step": 1369 }, { "epoch": 0.5459798744644814, "grad_norm": 0.29034836879196485, "learning_rate": 1.7413521049498823e-05, "loss": 0.8176, "step": 1370 }, { "epoch": 0.5463783999202949, "grad_norm": 0.2768072644524204, "learning_rate": 1.7388430812236556e-05, "loss": 0.7693, "step": 1371 }, { "epoch": 0.5467769253761084, "grad_norm": 0.2769206801693697, "learning_rate": 1.7363344755662555e-05, "loss": 0.8047, "step": 1372 }, { "epoch": 0.5471754508319219, "grad_norm": 0.36766327627843176, "learning_rate": 1.733826291993541e-05, "loss": 0.8223, "step": 1373 }, { "epoch": 0.5475739762877354, "grad_norm": 0.40226420420015246, "learning_rate": 1.7313185345206968e-05, "loss": 0.7996, "step": 1374 }, { "epoch": 0.5479725017435488, "grad_norm": 0.2964909563746245, "learning_rate": 1.728811207162228e-05, "loss": 0.809, "step": 1375 }, { "epoch": 0.5483710271993624, "grad_norm": 0.28906435974471956, "learning_rate": 1.7263043139319476e-05, "loss": 0.755, "step": 1376 }, { "epoch": 0.5487695526551758, "grad_norm": 0.292890255157397, "learning_rate": 1.7237978588429753e-05, "loss": 0.8009, "step": 1377 }, { "epoch": 0.5491680781109893, "grad_norm": 0.2781433781639577, "learning_rate": 1.721291845907729e-05, "loss": 0.7944, "step": 1378 }, { "epoch": 0.5495666035668029, "grad_norm": 0.2937009571551766, "learning_rate": 1.7187862791379198e-05, "loss": 0.8135, "step": 1379 }, { "epoch": 0.5499651290226163, "grad_norm": 0.2912565732468286, "learning_rate": 1.7162811625445423e-05, "loss": 0.8388, "step": 1380 }, { "epoch": 0.5503636544784298, "grad_norm": 0.2748876016189558, "learning_rate": 1.7137765001378724e-05, "loss": 0.836, "step": 1381 }, { "epoch": 0.5507621799342433, "grad_norm": 0.3163822046309509, "learning_rate": 1.711272295927459e-05, "loss": 0.8288, "step": 1382 }, { "epoch": 0.5511607053900568, "grad_norm": 0.27254752708037466, "learning_rate": 1.7087685539221162e-05, "loss": 0.8161, "step": 1383 }, { "epoch": 0.5515592308458703, "grad_norm": 0.3125729789680171, "learning_rate": 1.70626527812992e-05, "loss": 0.8181, "step": 1384 }, { "epoch": 0.5519577563016838, "grad_norm": 0.29916353607545526, "learning_rate": 1.703762472558196e-05, "loss": 0.776, "step": 1385 }, { "epoch": 0.5523562817574973, "grad_norm": 0.333298444535358, "learning_rate": 1.7012601412135237e-05, "loss": 0.8271, "step": 1386 }, { "epoch": 0.5527548072133107, "grad_norm": 0.26574557176935226, "learning_rate": 1.6987582881017173e-05, "loss": 0.7903, "step": 1387 }, { "epoch": 0.5531533326691243, "grad_norm": 0.30640181668201066, "learning_rate": 1.6962569172278283e-05, "loss": 0.8029, "step": 1388 }, { "epoch": 0.5535518581249377, "grad_norm": 0.27259308701491025, "learning_rate": 1.6937560325961364e-05, "loss": 0.8145, "step": 1389 }, { "epoch": 0.5539503835807512, "grad_norm": 0.29936679527497784, "learning_rate": 1.6912556382101415e-05, "loss": 0.791, "step": 1390 }, { "epoch": 0.5543489090365648, "grad_norm": 0.2708401911735976, "learning_rate": 1.6887557380725602e-05, "loss": 0.8067, "step": 1391 }, { "epoch": 0.5547474344923782, "grad_norm": 0.2744964958311244, "learning_rate": 1.6862563361853165e-05, "loss": 0.8082, "step": 1392 }, { "epoch": 0.5551459599481917, "grad_norm": 0.27774556322816, "learning_rate": 1.6837574365495383e-05, "loss": 0.8201, "step": 1393 }, { "epoch": 0.5555444854040051, "grad_norm": 0.2860333592628782, "learning_rate": 1.6812590431655473e-05, "loss": 0.8132, "step": 1394 }, { "epoch": 0.5559430108598187, "grad_norm": 0.2874026887492097, "learning_rate": 1.678761160032857e-05, "loss": 0.8031, "step": 1395 }, { "epoch": 0.5563415363156322, "grad_norm": 0.28106720251341816, "learning_rate": 1.676263791150164e-05, "loss": 0.8094, "step": 1396 }, { "epoch": 0.5567400617714456, "grad_norm": 0.29522074096111917, "learning_rate": 1.6737669405153388e-05, "loss": 0.794, "step": 1397 }, { "epoch": 0.5571385872272592, "grad_norm": 0.273137049734289, "learning_rate": 1.6712706121254264e-05, "loss": 0.7904, "step": 1398 }, { "epoch": 0.5575371126830726, "grad_norm": 0.2938729039193004, "learning_rate": 1.668774809976632e-05, "loss": 0.8211, "step": 1399 }, { "epoch": 0.5579356381388861, "grad_norm": 0.27893542802339405, "learning_rate": 1.6662795380643212e-05, "loss": 0.7831, "step": 1400 }, { "epoch": 0.5583341635946996, "grad_norm": 0.31771721535476655, "learning_rate": 1.6637848003830086e-05, "loss": 0.78, "step": 1401 }, { "epoch": 0.5587326890505131, "grad_norm": 0.27599058172210705, "learning_rate": 1.6612906009263553e-05, "loss": 0.7996, "step": 1402 }, { "epoch": 0.5591312145063266, "grad_norm": 0.26309299304248956, "learning_rate": 1.6587969436871608e-05, "loss": 0.8273, "step": 1403 }, { "epoch": 0.5595297399621401, "grad_norm": 0.2658663776464135, "learning_rate": 1.6563038326573544e-05, "loss": 0.7803, "step": 1404 }, { "epoch": 0.5599282654179536, "grad_norm": 0.27453871016555076, "learning_rate": 1.6538112718279937e-05, "loss": 0.8192, "step": 1405 }, { "epoch": 0.560326790873767, "grad_norm": 0.30380713206643706, "learning_rate": 1.651319265189254e-05, "loss": 0.7841, "step": 1406 }, { "epoch": 0.5607253163295806, "grad_norm": 0.2745314071899381, "learning_rate": 1.6488278167304243e-05, "loss": 0.7966, "step": 1407 }, { "epoch": 0.5611238417853941, "grad_norm": 0.27106784806374307, "learning_rate": 1.6463369304398976e-05, "loss": 0.782, "step": 1408 }, { "epoch": 0.5615223672412075, "grad_norm": 0.26824801623885447, "learning_rate": 1.6438466103051708e-05, "loss": 0.7975, "step": 1409 }, { "epoch": 0.5619208926970211, "grad_norm": 0.315466445265476, "learning_rate": 1.641356860312833e-05, "loss": 0.8375, "step": 1410 }, { "epoch": 0.5623194181528345, "grad_norm": 0.26586433303215745, "learning_rate": 1.6388676844485583e-05, "loss": 0.7963, "step": 1411 }, { "epoch": 0.562717943608648, "grad_norm": 0.26384331857538773, "learning_rate": 1.636379086697105e-05, "loss": 0.811, "step": 1412 }, { "epoch": 0.5631164690644614, "grad_norm": 0.2743841871460786, "learning_rate": 1.6338910710423034e-05, "loss": 0.7687, "step": 1413 }, { "epoch": 0.563514994520275, "grad_norm": 0.2598827208531272, "learning_rate": 1.6314036414670544e-05, "loss": 0.7926, "step": 1414 }, { "epoch": 0.5639135199760885, "grad_norm": 0.2631333168836199, "learning_rate": 1.6289168019533182e-05, "loss": 0.8233, "step": 1415 }, { "epoch": 0.5643120454319019, "grad_norm": 0.274009439927925, "learning_rate": 1.626430556482112e-05, "loss": 0.8093, "step": 1416 }, { "epoch": 0.5647105708877155, "grad_norm": 0.2815241084799363, "learning_rate": 1.623944909033502e-05, "loss": 0.8386, "step": 1417 }, { "epoch": 0.5651090963435289, "grad_norm": 0.2693426340478129, "learning_rate": 1.621459863586596e-05, "loss": 0.7934, "step": 1418 }, { "epoch": 0.5655076217993424, "grad_norm": 0.28640728418548206, "learning_rate": 1.61897542411954e-05, "loss": 0.7605, "step": 1419 }, { "epoch": 0.565906147255156, "grad_norm": 0.28566808429395685, "learning_rate": 1.6164915946095063e-05, "loss": 0.7836, "step": 1420 }, { "epoch": 0.5663046727109694, "grad_norm": 0.2703972532532415, "learning_rate": 1.6140083790326963e-05, "loss": 0.8089, "step": 1421 }, { "epoch": 0.5667031981667829, "grad_norm": 0.2792579130299739, "learning_rate": 1.6115257813643227e-05, "loss": 0.8133, "step": 1422 }, { "epoch": 0.5671017236225964, "grad_norm": 0.2729454606681309, "learning_rate": 1.6090438055786123e-05, "loss": 0.8097, "step": 1423 }, { "epoch": 0.5675002490784099, "grad_norm": 0.2915157005944316, "learning_rate": 1.606562455648798e-05, "loss": 0.8078, "step": 1424 }, { "epoch": 0.5678987745342233, "grad_norm": 0.29032778472704807, "learning_rate": 1.6040817355471065e-05, "loss": 0.7931, "step": 1425 }, { "epoch": 0.5682972999900369, "grad_norm": 0.2636401468661431, "learning_rate": 1.601601649244759e-05, "loss": 0.8162, "step": 1426 }, { "epoch": 0.5686958254458504, "grad_norm": 0.288342129461046, "learning_rate": 1.5991222007119614e-05, "loss": 0.831, "step": 1427 }, { "epoch": 0.5690943509016638, "grad_norm": 0.25892278113322154, "learning_rate": 1.5966433939178992e-05, "loss": 0.7956, "step": 1428 }, { "epoch": 0.5694928763574774, "grad_norm": 0.30072057342912867, "learning_rate": 1.5941652328307296e-05, "loss": 0.777, "step": 1429 }, { "epoch": 0.5698914018132908, "grad_norm": 0.26806489233741043, "learning_rate": 1.5916877214175768e-05, "loss": 0.8291, "step": 1430 }, { "epoch": 0.5702899272691043, "grad_norm": 0.2905448743699399, "learning_rate": 1.589210863644525e-05, "loss": 0.8472, "step": 1431 }, { "epoch": 0.5706884527249177, "grad_norm": 0.2982764650867147, "learning_rate": 1.586734663476612e-05, "loss": 0.8144, "step": 1432 }, { "epoch": 0.5710869781807313, "grad_norm": 0.2872873533319639, "learning_rate": 1.584259124877823e-05, "loss": 0.8113, "step": 1433 }, { "epoch": 0.5714855036365448, "grad_norm": 0.29449735325312454, "learning_rate": 1.5817842518110827e-05, "loss": 0.8214, "step": 1434 }, { "epoch": 0.5718840290923582, "grad_norm": 0.39051963343272733, "learning_rate": 1.5793100482382525e-05, "loss": 0.7799, "step": 1435 }, { "epoch": 0.5722825545481718, "grad_norm": 0.2616459809836497, "learning_rate": 1.5768365181201205e-05, "loss": 0.7777, "step": 1436 }, { "epoch": 0.5726810800039852, "grad_norm": 0.28842653622157877, "learning_rate": 1.574363665416398e-05, "loss": 0.7962, "step": 1437 }, { "epoch": 0.5730796054597987, "grad_norm": 0.2641950748942506, "learning_rate": 1.5718914940857114e-05, "loss": 0.7991, "step": 1438 }, { "epoch": 0.5734781309156123, "grad_norm": 0.27488209941925706, "learning_rate": 1.5694200080855952e-05, "loss": 0.7883, "step": 1439 }, { "epoch": 0.5738766563714257, "grad_norm": 0.26045131988579345, "learning_rate": 1.5669492113724888e-05, "loss": 0.7938, "step": 1440 }, { "epoch": 0.5742751818272392, "grad_norm": 0.2974260811653572, "learning_rate": 1.5644791079017263e-05, "loss": 0.8168, "step": 1441 }, { "epoch": 0.5746737072830527, "grad_norm": 0.28973731321680374, "learning_rate": 1.562009701627533e-05, "loss": 0.7946, "step": 1442 }, { "epoch": 0.5750722327388662, "grad_norm": 0.28100822605068104, "learning_rate": 1.5595409965030188e-05, "loss": 0.8041, "step": 1443 }, { "epoch": 0.5754707581946796, "grad_norm": 0.2836905042084171, "learning_rate": 1.557072996480169e-05, "loss": 0.7906, "step": 1444 }, { "epoch": 0.5758692836504932, "grad_norm": 0.265117167660616, "learning_rate": 1.554605705509843e-05, "loss": 0.8415, "step": 1445 }, { "epoch": 0.5762678091063067, "grad_norm": 0.26306772688466995, "learning_rate": 1.5521391275417613e-05, "loss": 0.8292, "step": 1446 }, { "epoch": 0.5766663345621201, "grad_norm": 0.2710950213877723, "learning_rate": 1.5496732665245085e-05, "loss": 0.8231, "step": 1447 }, { "epoch": 0.5770648600179337, "grad_norm": 0.2788906456071625, "learning_rate": 1.5472081264055154e-05, "loss": 0.8116, "step": 1448 }, { "epoch": 0.5774633854737471, "grad_norm": 0.27310715767259724, "learning_rate": 1.5447437111310624e-05, "loss": 0.8271, "step": 1449 }, { "epoch": 0.5778619109295606, "grad_norm": 0.2785035809739301, "learning_rate": 1.5422800246462706e-05, "loss": 0.7981, "step": 1450 }, { "epoch": 0.5782604363853742, "grad_norm": 0.27219975804237134, "learning_rate": 1.5398170708950902e-05, "loss": 0.7965, "step": 1451 }, { "epoch": 0.5786589618411876, "grad_norm": 0.27506447504088605, "learning_rate": 1.5373548538203026e-05, "loss": 0.8201, "step": 1452 }, { "epoch": 0.5790574872970011, "grad_norm": 0.2946170401264071, "learning_rate": 1.5348933773635067e-05, "loss": 0.8128, "step": 1453 }, { "epoch": 0.5794560127528146, "grad_norm": 0.3826815086737385, "learning_rate": 1.532432645465118e-05, "loss": 0.8173, "step": 1454 }, { "epoch": 0.5798545382086281, "grad_norm": 0.2924952233528226, "learning_rate": 1.5299726620643595e-05, "loss": 0.7775, "step": 1455 }, { "epoch": 0.5802530636644415, "grad_norm": 0.2642260239005724, "learning_rate": 1.5275134310992553e-05, "loss": 0.8191, "step": 1456 }, { "epoch": 0.580651589120255, "grad_norm": 0.3149422419473645, "learning_rate": 1.5250549565066262e-05, "loss": 0.7974, "step": 1457 }, { "epoch": 0.5810501145760686, "grad_norm": 0.27490534215380524, "learning_rate": 1.5225972422220804e-05, "loss": 0.804, "step": 1458 }, { "epoch": 0.581448640031882, "grad_norm": 0.2755621466065312, "learning_rate": 1.5201402921800114e-05, "loss": 0.8127, "step": 1459 }, { "epoch": 0.5818471654876956, "grad_norm": 0.3037669691142441, "learning_rate": 1.5176841103135867e-05, "loss": 0.7912, "step": 1460 }, { "epoch": 0.582245690943509, "grad_norm": 0.25177796617384035, "learning_rate": 1.5152287005547458e-05, "loss": 0.8329, "step": 1461 }, { "epoch": 0.5826442163993225, "grad_norm": 0.27341817612876335, "learning_rate": 1.512774066834191e-05, "loss": 0.7794, "step": 1462 }, { "epoch": 0.583042741855136, "grad_norm": 0.2406762714221454, "learning_rate": 1.5103202130813839e-05, "loss": 0.7918, "step": 1463 }, { "epoch": 0.5834412673109495, "grad_norm": 0.28482104897292554, "learning_rate": 1.5078671432245362e-05, "loss": 0.7675, "step": 1464 }, { "epoch": 0.583839792766763, "grad_norm": 0.25741699835096044, "learning_rate": 1.5054148611906047e-05, "loss": 0.7924, "step": 1465 }, { "epoch": 0.5842383182225764, "grad_norm": 0.2920808223289217, "learning_rate": 1.5029633709052864e-05, "loss": 0.8141, "step": 1466 }, { "epoch": 0.58463684367839, "grad_norm": 0.2807331727085593, "learning_rate": 1.5005126762930085e-05, "loss": 0.7992, "step": 1467 }, { "epoch": 0.5850353691342034, "grad_norm": 0.2785002462676359, "learning_rate": 1.4980627812769273e-05, "loss": 0.8283, "step": 1468 }, { "epoch": 0.5854338945900169, "grad_norm": 0.48934793357042067, "learning_rate": 1.4956136897789155e-05, "loss": 0.8011, "step": 1469 }, { "epoch": 0.5858324200458305, "grad_norm": 0.2541832978215571, "learning_rate": 1.4931654057195633e-05, "loss": 0.7957, "step": 1470 }, { "epoch": 0.5862309455016439, "grad_norm": 0.28333216989436416, "learning_rate": 1.4907179330181667e-05, "loss": 0.7933, "step": 1471 }, { "epoch": 0.5866294709574574, "grad_norm": 0.25893200895383417, "learning_rate": 1.4882712755927208e-05, "loss": 0.8324, "step": 1472 }, { "epoch": 0.5870279964132709, "grad_norm": 0.2964360831302451, "learning_rate": 1.4858254373599206e-05, "loss": 0.8116, "step": 1473 }, { "epoch": 0.5874265218690844, "grad_norm": 0.2520201190243798, "learning_rate": 1.4833804222351437e-05, "loss": 0.7728, "step": 1474 }, { "epoch": 0.5878250473248979, "grad_norm": 0.28965585570658003, "learning_rate": 1.4809362341324549e-05, "loss": 0.8301, "step": 1475 }, { "epoch": 0.5882235727807114, "grad_norm": 0.2680016094991912, "learning_rate": 1.478492876964592e-05, "loss": 0.8104, "step": 1476 }, { "epoch": 0.5886220982365249, "grad_norm": 0.29138008709625307, "learning_rate": 1.4760503546429642e-05, "loss": 0.7939, "step": 1477 }, { "epoch": 0.5890206236923383, "grad_norm": 0.27301356294256424, "learning_rate": 1.473608671077644e-05, "loss": 0.8017, "step": 1478 }, { "epoch": 0.5894191491481519, "grad_norm": 0.27632908308241927, "learning_rate": 1.4711678301773607e-05, "loss": 0.7876, "step": 1479 }, { "epoch": 0.5898176746039653, "grad_norm": 0.29739284619714174, "learning_rate": 1.4687278358494954e-05, "loss": 0.8396, "step": 1480 }, { "epoch": 0.5902162000597788, "grad_norm": 0.26373275038816285, "learning_rate": 1.4662886920000727e-05, "loss": 0.7893, "step": 1481 }, { "epoch": 0.5906147255155924, "grad_norm": 0.28819618380315065, "learning_rate": 1.463850402533758e-05, "loss": 0.8096, "step": 1482 }, { "epoch": 0.5910132509714058, "grad_norm": 0.26086188725806075, "learning_rate": 1.4614129713538456e-05, "loss": 0.8272, "step": 1483 }, { "epoch": 0.5914117764272193, "grad_norm": 0.2998087493750338, "learning_rate": 1.4589764023622585e-05, "loss": 0.811, "step": 1484 }, { "epoch": 0.5918103018830327, "grad_norm": 0.28423477916709305, "learning_rate": 1.4565406994595402e-05, "loss": 0.8314, "step": 1485 }, { "epoch": 0.5922088273388463, "grad_norm": 0.4714680189752818, "learning_rate": 1.4541058665448437e-05, "loss": 0.8132, "step": 1486 }, { "epoch": 0.5926073527946598, "grad_norm": 0.2832956819184063, "learning_rate": 1.4516719075159342e-05, "loss": 0.8201, "step": 1487 }, { "epoch": 0.5930058782504732, "grad_norm": 0.280931582487737, "learning_rate": 1.4492388262691737e-05, "loss": 0.8104, "step": 1488 }, { "epoch": 0.5934044037062868, "grad_norm": 0.4352514915841819, "learning_rate": 1.4468066266995222e-05, "loss": 0.7969, "step": 1489 }, { "epoch": 0.5938029291621002, "grad_norm": 0.28686409934998564, "learning_rate": 1.4443753127005264e-05, "loss": 0.7842, "step": 1490 }, { "epoch": 0.5942014546179137, "grad_norm": 0.2569294965760903, "learning_rate": 1.4419448881643158e-05, "loss": 0.8154, "step": 1491 }, { "epoch": 0.5945999800737272, "grad_norm": 0.28382287666623324, "learning_rate": 1.4395153569815974e-05, "loss": 0.8105, "step": 1492 }, { "epoch": 0.5949985055295407, "grad_norm": 0.2572203424982894, "learning_rate": 1.4370867230416451e-05, "loss": 0.7826, "step": 1493 }, { "epoch": 0.5953970309853542, "grad_norm": 6.465506917099715, "learning_rate": 1.4346589902323003e-05, "loss": 0.783, "step": 1494 }, { "epoch": 0.5957955564411677, "grad_norm": 0.39706235846696825, "learning_rate": 1.432232162439957e-05, "loss": 0.8166, "step": 1495 }, { "epoch": 0.5961940818969812, "grad_norm": 0.26404445452409736, "learning_rate": 1.4298062435495661e-05, "loss": 0.7826, "step": 1496 }, { "epoch": 0.5965926073527946, "grad_norm": 0.3308104505575439, "learning_rate": 1.4273812374446183e-05, "loss": 0.795, "step": 1497 }, { "epoch": 0.5969911328086082, "grad_norm": 0.3026458263801191, "learning_rate": 1.4249571480071467e-05, "loss": 0.7715, "step": 1498 }, { "epoch": 0.5973896582644216, "grad_norm": 0.28588534412959155, "learning_rate": 1.4225339791177151e-05, "loss": 0.7987, "step": 1499 }, { "epoch": 0.5977881837202351, "grad_norm": 0.32101230875160675, "learning_rate": 1.4201117346554144e-05, "loss": 0.8046, "step": 1500 }, { "epoch": 0.5981867091760487, "grad_norm": 0.290897264466864, "learning_rate": 1.4176904184978552e-05, "loss": 0.8004, "step": 1501 }, { "epoch": 0.5985852346318621, "grad_norm": 0.3026009841483658, "learning_rate": 1.4152700345211626e-05, "loss": 0.8065, "step": 1502 }, { "epoch": 0.5989837600876756, "grad_norm": 0.44263950851966477, "learning_rate": 1.412850586599969e-05, "loss": 0.8096, "step": 1503 }, { "epoch": 0.599382285543489, "grad_norm": 0.3248532926102643, "learning_rate": 1.4104320786074078e-05, "loss": 0.8377, "step": 1504 }, { "epoch": 0.5997808109993026, "grad_norm": 0.28575595840318735, "learning_rate": 1.408014514415109e-05, "loss": 0.78, "step": 1505 }, { "epoch": 0.6001793364551161, "grad_norm": 0.2794084216593132, "learning_rate": 1.4055978978931919e-05, "loss": 0.784, "step": 1506 }, { "epoch": 0.6005778619109295, "grad_norm": 0.2796315632479643, "learning_rate": 1.4031822329102558e-05, "loss": 0.7991, "step": 1507 }, { "epoch": 0.6009763873667431, "grad_norm": 0.29082183486321656, "learning_rate": 1.4007675233333812e-05, "loss": 0.7593, "step": 1508 }, { "epoch": 0.6013749128225565, "grad_norm": 0.27442890679937104, "learning_rate": 1.3983537730281153e-05, "loss": 0.82, "step": 1509 }, { "epoch": 0.60177343827837, "grad_norm": 0.28240777195387234, "learning_rate": 1.3959409858584718e-05, "loss": 0.7895, "step": 1510 }, { "epoch": 0.6021719637341835, "grad_norm": 0.28640189626735446, "learning_rate": 1.3935291656869216e-05, "loss": 0.8065, "step": 1511 }, { "epoch": 0.602570489189997, "grad_norm": 0.27042843088562313, "learning_rate": 1.3911183163743883e-05, "loss": 0.7875, "step": 1512 }, { "epoch": 0.6029690146458105, "grad_norm": 0.3230930753709, "learning_rate": 1.3887084417802412e-05, "loss": 0.7854, "step": 1513 }, { "epoch": 0.603367540101624, "grad_norm": 0.26957683695591095, "learning_rate": 1.3862995457622883e-05, "loss": 0.8231, "step": 1514 }, { "epoch": 0.6037660655574375, "grad_norm": 0.2814390906832594, "learning_rate": 1.3838916321767726e-05, "loss": 0.8048, "step": 1515 }, { "epoch": 0.6041645910132509, "grad_norm": 0.2654808310179734, "learning_rate": 1.381484704878363e-05, "loss": 0.8074, "step": 1516 }, { "epoch": 0.6045631164690645, "grad_norm": 0.26170541781453055, "learning_rate": 1.379078767720151e-05, "loss": 0.7921, "step": 1517 }, { "epoch": 0.604961641924878, "grad_norm": 0.26340697807382485, "learning_rate": 1.3766738245536403e-05, "loss": 0.7894, "step": 1518 }, { "epoch": 0.6053601673806914, "grad_norm": 0.34917912033176396, "learning_rate": 1.3742698792287467e-05, "loss": 0.7979, "step": 1519 }, { "epoch": 0.605758692836505, "grad_norm": 0.2698143223745579, "learning_rate": 1.371866935593788e-05, "loss": 0.7705, "step": 1520 }, { "epoch": 0.6061572182923184, "grad_norm": 0.25293807015990133, "learning_rate": 1.369464997495475e-05, "loss": 0.7881, "step": 1521 }, { "epoch": 0.6065557437481319, "grad_norm": 0.2713270396836266, "learning_rate": 1.3670640687789139e-05, "loss": 0.7931, "step": 1522 }, { "epoch": 0.6069542692039454, "grad_norm": 0.27034783836116744, "learning_rate": 1.3646641532875911e-05, "loss": 0.7961, "step": 1523 }, { "epoch": 0.6073527946597589, "grad_norm": 0.27490482613460554, "learning_rate": 1.362265254863373e-05, "loss": 0.8147, "step": 1524 }, { "epoch": 0.6077513201155724, "grad_norm": 0.2676216739525722, "learning_rate": 1.3598673773464972e-05, "loss": 0.7853, "step": 1525 }, { "epoch": 0.6081498455713858, "grad_norm": 1.272473309536001, "learning_rate": 1.3574705245755669e-05, "loss": 0.8089, "step": 1526 }, { "epoch": 0.6085483710271994, "grad_norm": 0.6571572306931123, "learning_rate": 1.3550747003875458e-05, "loss": 0.8261, "step": 1527 }, { "epoch": 0.6089468964830128, "grad_norm": 0.2596088192309901, "learning_rate": 1.3526799086177494e-05, "loss": 0.8193, "step": 1528 }, { "epoch": 0.6093454219388263, "grad_norm": 0.26444135616895786, "learning_rate": 1.350286153099842e-05, "loss": 0.7892, "step": 1529 }, { "epoch": 0.6097439473946399, "grad_norm": 0.25937034971149103, "learning_rate": 1.3478934376658273e-05, "loss": 0.8026, "step": 1530 }, { "epoch": 0.6101424728504533, "grad_norm": 0.7431115276392141, "learning_rate": 1.3455017661460464e-05, "loss": 0.7932, "step": 1531 }, { "epoch": 0.6105409983062668, "grad_norm": 0.27995622010695853, "learning_rate": 1.3431111423691677e-05, "loss": 0.7833, "step": 1532 }, { "epoch": 0.6109395237620803, "grad_norm": 0.2464262348021282, "learning_rate": 1.3407215701621812e-05, "loss": 0.796, "step": 1533 }, { "epoch": 0.6113380492178938, "grad_norm": 0.26689725888573773, "learning_rate": 1.3383330533503971e-05, "loss": 0.7984, "step": 1534 }, { "epoch": 0.6117365746737072, "grad_norm": 0.281243345103868, "learning_rate": 1.335945595757432e-05, "loss": 0.8119, "step": 1535 }, { "epoch": 0.6121351001295208, "grad_norm": 0.2807299105795548, "learning_rate": 1.3335592012052096e-05, "loss": 0.8208, "step": 1536 }, { "epoch": 0.6125336255853343, "grad_norm": 0.2822355271519365, "learning_rate": 1.3311738735139502e-05, "loss": 0.7958, "step": 1537 }, { "epoch": 0.6129321510411477, "grad_norm": 0.2570136422498892, "learning_rate": 1.328789616502168e-05, "loss": 0.7798, "step": 1538 }, { "epoch": 0.6133306764969613, "grad_norm": 0.2602381753045998, "learning_rate": 1.3264064339866622e-05, "loss": 0.7952, "step": 1539 }, { "epoch": 0.6137292019527747, "grad_norm": 0.27124645437474926, "learning_rate": 1.3240243297825112e-05, "loss": 0.8447, "step": 1540 }, { "epoch": 0.6141277274085882, "grad_norm": 0.2614506972170479, "learning_rate": 1.3216433077030689e-05, "loss": 0.8067, "step": 1541 }, { "epoch": 0.6145262528644018, "grad_norm": 0.273112140897487, "learning_rate": 1.3192633715599548e-05, "loss": 0.8041, "step": 1542 }, { "epoch": 0.6149247783202152, "grad_norm": 0.24587524256890503, "learning_rate": 1.3168845251630527e-05, "loss": 0.7969, "step": 1543 }, { "epoch": 0.6153233037760287, "grad_norm": 0.2931074811806814, "learning_rate": 1.3145067723204979e-05, "loss": 0.7919, "step": 1544 }, { "epoch": 0.6157218292318422, "grad_norm": 0.23408431837644428, "learning_rate": 1.3121301168386796e-05, "loss": 0.7974, "step": 1545 }, { "epoch": 0.6161203546876557, "grad_norm": 0.2885214636424266, "learning_rate": 1.3097545625222284e-05, "loss": 0.8183, "step": 1546 }, { "epoch": 0.6165188801434691, "grad_norm": 0.2565866864664869, "learning_rate": 1.3073801131740104e-05, "loss": 0.8187, "step": 1547 }, { "epoch": 0.6169174055992827, "grad_norm": 0.3070425063241222, "learning_rate": 1.3050067725951258e-05, "loss": 0.8084, "step": 1548 }, { "epoch": 0.6173159310550962, "grad_norm": 0.3551888980070755, "learning_rate": 1.3026345445848976e-05, "loss": 0.7969, "step": 1549 }, { "epoch": 0.6177144565109096, "grad_norm": 0.3309087361846915, "learning_rate": 1.3002634329408692e-05, "loss": 0.7573, "step": 1550 }, { "epoch": 0.6181129819667232, "grad_norm": 0.2685150964208705, "learning_rate": 1.2978934414587955e-05, "loss": 0.8077, "step": 1551 }, { "epoch": 0.6185115074225366, "grad_norm": 0.28733052685665156, "learning_rate": 1.2955245739326397e-05, "loss": 0.807, "step": 1552 }, { "epoch": 0.6189100328783501, "grad_norm": 0.25727837605034215, "learning_rate": 1.2931568341545649e-05, "loss": 0.8055, "step": 1553 }, { "epoch": 0.6193085583341637, "grad_norm": 0.28129842927276943, "learning_rate": 1.2907902259149287e-05, "loss": 0.8003, "step": 1554 }, { "epoch": 0.6197070837899771, "grad_norm": 0.2650304078824774, "learning_rate": 1.2884247530022786e-05, "loss": 0.7906, "step": 1555 }, { "epoch": 0.6201056092457906, "grad_norm": 0.36649121713601185, "learning_rate": 1.2860604192033414e-05, "loss": 0.7765, "step": 1556 }, { "epoch": 0.620504134701604, "grad_norm": 0.25867200718505207, "learning_rate": 1.2836972283030256e-05, "loss": 0.8186, "step": 1557 }, { "epoch": 0.6209026601574176, "grad_norm": 0.2720817068824379, "learning_rate": 1.2813351840844046e-05, "loss": 0.7753, "step": 1558 }, { "epoch": 0.621301185613231, "grad_norm": 0.27600718946732516, "learning_rate": 1.2789742903287187e-05, "loss": 0.8002, "step": 1559 }, { "epoch": 0.6216997110690445, "grad_norm": 0.26210695686216645, "learning_rate": 1.2766145508153689e-05, "loss": 0.7726, "step": 1560 }, { "epoch": 0.6220982365248581, "grad_norm": 0.27148047901992983, "learning_rate": 1.2742559693219035e-05, "loss": 0.8221, "step": 1561 }, { "epoch": 0.6224967619806715, "grad_norm": 0.2506440715577259, "learning_rate": 1.2718985496240209e-05, "loss": 0.8161, "step": 1562 }, { "epoch": 0.622895287436485, "grad_norm": 0.2562550466452998, "learning_rate": 1.2695422954955569e-05, "loss": 0.812, "step": 1563 }, { "epoch": 0.6232938128922985, "grad_norm": 0.273331861541004, "learning_rate": 1.2671872107084844e-05, "loss": 0.7746, "step": 1564 }, { "epoch": 0.623692338348112, "grad_norm": 0.24027870818880687, "learning_rate": 1.2648332990329016e-05, "loss": 0.783, "step": 1565 }, { "epoch": 0.6240908638039254, "grad_norm": 0.2751061681477381, "learning_rate": 1.2624805642370302e-05, "loss": 0.8006, "step": 1566 }, { "epoch": 0.624489389259739, "grad_norm": 0.2603821217505175, "learning_rate": 1.2601290100872081e-05, "loss": 0.8093, "step": 1567 }, { "epoch": 0.6248879147155525, "grad_norm": 0.3093537763083936, "learning_rate": 1.2577786403478815e-05, "loss": 0.8071, "step": 1568 }, { "epoch": 0.6252864401713659, "grad_norm": 0.25834846435694175, "learning_rate": 1.2554294587816039e-05, "loss": 0.8046, "step": 1569 }, { "epoch": 0.6256849656271795, "grad_norm": 0.2614225968860621, "learning_rate": 1.253081469149022e-05, "loss": 0.809, "step": 1570 }, { "epoch": 0.6260834910829929, "grad_norm": 0.2641571048713672, "learning_rate": 1.2507346752088788e-05, "loss": 0.8151, "step": 1571 }, { "epoch": 0.6264820165388064, "grad_norm": 0.2570556300174585, "learning_rate": 1.2483890807180003e-05, "loss": 0.7807, "step": 1572 }, { "epoch": 0.62688054199462, "grad_norm": 0.25821601421943596, "learning_rate": 1.2460446894312938e-05, "loss": 0.8099, "step": 1573 }, { "epoch": 0.6272790674504334, "grad_norm": 0.2631395054682711, "learning_rate": 1.243701505101741e-05, "loss": 0.8161, "step": 1574 }, { "epoch": 0.6276775929062469, "grad_norm": 0.24766766238334142, "learning_rate": 1.2413595314803892e-05, "loss": 0.7707, "step": 1575 }, { "epoch": 0.6280761183620603, "grad_norm": 0.24707466931883929, "learning_rate": 1.2390187723163503e-05, "loss": 0.804, "step": 1576 }, { "epoch": 0.6284746438178739, "grad_norm": 0.2621376069815184, "learning_rate": 1.2366792313567895e-05, "loss": 0.8055, "step": 1577 }, { "epoch": 0.6288731692736873, "grad_norm": 0.2455537279746612, "learning_rate": 1.2343409123469244e-05, "loss": 0.8099, "step": 1578 }, { "epoch": 0.6292716947295008, "grad_norm": 0.27105059580537544, "learning_rate": 1.232003819030013e-05, "loss": 0.7965, "step": 1579 }, { "epoch": 0.6296702201853144, "grad_norm": 0.24578937265717318, "learning_rate": 1.2296679551473551e-05, "loss": 0.7871, "step": 1580 }, { "epoch": 0.6300687456411278, "grad_norm": 0.24084765272449513, "learning_rate": 1.227333324438281e-05, "loss": 0.7965, "step": 1581 }, { "epoch": 0.6304672710969413, "grad_norm": 0.23922572705746703, "learning_rate": 1.2249999306401445e-05, "loss": 0.7936, "step": 1582 }, { "epoch": 0.6308657965527548, "grad_norm": 0.269202817136775, "learning_rate": 1.2226677774883236e-05, "loss": 0.8134, "step": 1583 }, { "epoch": 0.6312643220085683, "grad_norm": 0.24194081424246755, "learning_rate": 1.2203368687162058e-05, "loss": 0.8036, "step": 1584 }, { "epoch": 0.6316628474643818, "grad_norm": 0.2606593476377602, "learning_rate": 1.2180072080551899e-05, "loss": 0.8057, "step": 1585 }, { "epoch": 0.6320613729201953, "grad_norm": 0.25284920681339745, "learning_rate": 1.215678799234675e-05, "loss": 0.7793, "step": 1586 }, { "epoch": 0.6324598983760088, "grad_norm": 0.26507641296686857, "learning_rate": 1.2133516459820565e-05, "loss": 0.7942, "step": 1587 }, { "epoch": 0.6328584238318222, "grad_norm": 0.25208081960776024, "learning_rate": 1.2110257520227208e-05, "loss": 0.8054, "step": 1588 }, { "epoch": 0.6332569492876358, "grad_norm": 0.27064673184332666, "learning_rate": 1.2087011210800368e-05, "loss": 0.8022, "step": 1589 }, { "epoch": 0.6336554747434492, "grad_norm": 0.2586090399717606, "learning_rate": 1.206377756875353e-05, "loss": 0.7962, "step": 1590 }, { "epoch": 0.6340540001992627, "grad_norm": 0.2758486757724476, "learning_rate": 1.2040556631279885e-05, "loss": 0.8141, "step": 1591 }, { "epoch": 0.6344525256550763, "grad_norm": 0.25007000963272646, "learning_rate": 1.2017348435552308e-05, "loss": 0.7876, "step": 1592 }, { "epoch": 0.6348510511108897, "grad_norm": 0.28045825131568236, "learning_rate": 1.1994153018723247e-05, "loss": 0.7782, "step": 1593 }, { "epoch": 0.6352495765667032, "grad_norm": 0.2559398025371776, "learning_rate": 1.1970970417924715e-05, "loss": 0.8016, "step": 1594 }, { "epoch": 0.6356481020225166, "grad_norm": 0.2910472724027498, "learning_rate": 1.1947800670268218e-05, "loss": 0.8057, "step": 1595 }, { "epoch": 0.6360466274783302, "grad_norm": 0.26090925545251104, "learning_rate": 1.1924643812844648e-05, "loss": 0.8074, "step": 1596 }, { "epoch": 0.6364451529341437, "grad_norm": 0.26077758902957177, "learning_rate": 1.1901499882724302e-05, "loss": 0.8125, "step": 1597 }, { "epoch": 0.6368436783899571, "grad_norm": 0.27193415193529746, "learning_rate": 1.1878368916956758e-05, "loss": 0.8205, "step": 1598 }, { "epoch": 0.6372422038457707, "grad_norm": 0.24868413662213312, "learning_rate": 1.1855250952570852e-05, "loss": 0.8046, "step": 1599 }, { "epoch": 0.6376407293015841, "grad_norm": 0.25516205225914074, "learning_rate": 1.1832146026574597e-05, "loss": 0.7823, "step": 1600 }, { "epoch": 0.6380392547573976, "grad_norm": 0.2444397059280007, "learning_rate": 1.1809054175955148e-05, "loss": 0.8074, "step": 1601 }, { "epoch": 0.6384377802132111, "grad_norm": 0.2406561292975351, "learning_rate": 1.1785975437678716e-05, "loss": 0.7995, "step": 1602 }, { "epoch": 0.6388363056690246, "grad_norm": 0.25213243022945864, "learning_rate": 1.1762909848690525e-05, "loss": 0.794, "step": 1603 }, { "epoch": 0.6392348311248381, "grad_norm": 0.250582196145571, "learning_rate": 1.1739857445914757e-05, "loss": 0.8081, "step": 1604 }, { "epoch": 0.6396333565806516, "grad_norm": 0.24639126507572728, "learning_rate": 1.1716818266254462e-05, "loss": 0.8223, "step": 1605 }, { "epoch": 0.6400318820364651, "grad_norm": 0.2341044085916874, "learning_rate": 1.169379234659156e-05, "loss": 0.8122, "step": 1606 }, { "epoch": 0.6404304074922785, "grad_norm": 0.34128549774390465, "learning_rate": 1.1670779723786697e-05, "loss": 0.8032, "step": 1607 }, { "epoch": 0.6408289329480921, "grad_norm": 0.33588417532052334, "learning_rate": 1.1647780434679273e-05, "loss": 0.7921, "step": 1608 }, { "epoch": 0.6412274584039056, "grad_norm": 0.25140600726539664, "learning_rate": 1.1624794516087322e-05, "loss": 0.7937, "step": 1609 }, { "epoch": 0.641625983859719, "grad_norm": 0.23449581497433394, "learning_rate": 1.160182200480748e-05, "loss": 0.7835, "step": 1610 }, { "epoch": 0.6420245093155326, "grad_norm": 0.24952525378723442, "learning_rate": 1.1578862937614935e-05, "loss": 0.7802, "step": 1611 }, { "epoch": 0.642423034771346, "grad_norm": 0.24961214587481048, "learning_rate": 1.1555917351263313e-05, "loss": 0.7823, "step": 1612 }, { "epoch": 0.6428215602271595, "grad_norm": 0.243896806000912, "learning_rate": 1.1532985282484694e-05, "loss": 0.7699, "step": 1613 }, { "epoch": 0.643220085682973, "grad_norm": 0.2704485294167498, "learning_rate": 1.1510066767989522e-05, "loss": 0.7942, "step": 1614 }, { "epoch": 0.6436186111387865, "grad_norm": 0.24876368726137116, "learning_rate": 1.1487161844466513e-05, "loss": 0.8, "step": 1615 }, { "epoch": 0.6440171365946, "grad_norm": 0.24032636669948387, "learning_rate": 1.1464270548582648e-05, "loss": 0.7968, "step": 1616 }, { "epoch": 0.6444156620504135, "grad_norm": 0.26676359276330697, "learning_rate": 1.1441392916983088e-05, "loss": 0.8146, "step": 1617 }, { "epoch": 0.644814187506227, "grad_norm": 0.24000233827708323, "learning_rate": 1.1418528986291126e-05, "loss": 0.813, "step": 1618 }, { "epoch": 0.6452127129620404, "grad_norm": 0.2384982360045188, "learning_rate": 1.1395678793108106e-05, "loss": 0.7664, "step": 1619 }, { "epoch": 0.645611238417854, "grad_norm": 0.25233152858510866, "learning_rate": 1.1372842374013389e-05, "loss": 0.791, "step": 1620 }, { "epoch": 0.6460097638736674, "grad_norm": 0.23424267270162125, "learning_rate": 1.135001976556429e-05, "loss": 0.7872, "step": 1621 }, { "epoch": 0.6464082893294809, "grad_norm": 0.3476922887656111, "learning_rate": 1.1327211004296013e-05, "loss": 0.8117, "step": 1622 }, { "epoch": 0.6468068147852944, "grad_norm": 0.25999768296030096, "learning_rate": 1.1304416126721604e-05, "loss": 0.8016, "step": 1623 }, { "epoch": 0.6472053402411079, "grad_norm": 0.2386900544989497, "learning_rate": 1.1281635169331855e-05, "loss": 0.816, "step": 1624 }, { "epoch": 0.6476038656969214, "grad_norm": 0.23919854850884364, "learning_rate": 1.1258868168595309e-05, "loss": 0.7672, "step": 1625 }, { "epoch": 0.6480023911527348, "grad_norm": 0.24292253081996207, "learning_rate": 1.1236115160958137e-05, "loss": 0.7876, "step": 1626 }, { "epoch": 0.6484009166085484, "grad_norm": 0.2573324955094864, "learning_rate": 1.1213376182844118e-05, "loss": 0.8105, "step": 1627 }, { "epoch": 0.6487994420643619, "grad_norm": 0.2374878968994724, "learning_rate": 1.1190651270654608e-05, "loss": 0.7956, "step": 1628 }, { "epoch": 0.6491979675201753, "grad_norm": 0.22808064108307496, "learning_rate": 1.1167940460768384e-05, "loss": 0.778, "step": 1629 }, { "epoch": 0.6495964929759889, "grad_norm": 0.24975995163182776, "learning_rate": 1.11452437895417e-05, "loss": 0.7927, "step": 1630 }, { "epoch": 0.6499950184318023, "grad_norm": 0.2496302640812307, "learning_rate": 1.1122561293308134e-05, "loss": 0.8093, "step": 1631 }, { "epoch": 0.6503935438876158, "grad_norm": 0.2382600490081852, "learning_rate": 1.1099893008378602e-05, "loss": 0.7989, "step": 1632 }, { "epoch": 0.6507920693434293, "grad_norm": 0.23966141846275127, "learning_rate": 1.1077238971041265e-05, "loss": 0.7737, "step": 1633 }, { "epoch": 0.6511905947992428, "grad_norm": 0.2658481127884238, "learning_rate": 1.1054599217561466e-05, "loss": 0.8161, "step": 1634 }, { "epoch": 0.6515891202550563, "grad_norm": 0.23310988570227098, "learning_rate": 1.10319737841817e-05, "loss": 0.7965, "step": 1635 }, { "epoch": 0.6519876457108698, "grad_norm": 0.2593756062996178, "learning_rate": 1.1009362707121506e-05, "loss": 0.8034, "step": 1636 }, { "epoch": 0.6523861711666833, "grad_norm": 0.25538154058327805, "learning_rate": 1.098676602257748e-05, "loss": 0.8041, "step": 1637 }, { "epoch": 0.6527846966224967, "grad_norm": 0.253312859294886, "learning_rate": 1.0964183766723142e-05, "loss": 0.8418, "step": 1638 }, { "epoch": 0.6531832220783103, "grad_norm": 0.2492955855138997, "learning_rate": 1.0941615975708939e-05, "loss": 0.7821, "step": 1639 }, { "epoch": 0.6535817475341238, "grad_norm": 0.24807305513899183, "learning_rate": 1.0919062685662154e-05, "loss": 0.8218, "step": 1640 }, { "epoch": 0.6539802729899372, "grad_norm": 0.24157259403786543, "learning_rate": 1.0896523932686853e-05, "loss": 0.8093, "step": 1641 }, { "epoch": 0.6543787984457508, "grad_norm": 0.2887027342486142, "learning_rate": 1.0873999752863846e-05, "loss": 0.7708, "step": 1642 }, { "epoch": 0.6547773239015642, "grad_norm": 0.2516367839521763, "learning_rate": 1.085149018225058e-05, "loss": 0.8102, "step": 1643 }, { "epoch": 0.6551758493573777, "grad_norm": 0.24924932650750312, "learning_rate": 1.0828995256881151e-05, "loss": 0.8155, "step": 1644 }, { "epoch": 0.6555743748131911, "grad_norm": 0.2794672477405356, "learning_rate": 1.0806515012766196e-05, "loss": 0.7793, "step": 1645 }, { "epoch": 0.6559729002690047, "grad_norm": 0.2573710085448088, "learning_rate": 1.0784049485892853e-05, "loss": 0.7823, "step": 1646 }, { "epoch": 0.6563714257248182, "grad_norm": 0.22754413318247524, "learning_rate": 1.0761598712224686e-05, "loss": 0.8244, "step": 1647 }, { "epoch": 0.6567699511806316, "grad_norm": 0.2529075220091104, "learning_rate": 1.0739162727701655e-05, "loss": 0.8248, "step": 1648 }, { "epoch": 0.6571684766364452, "grad_norm": 0.23442166283314864, "learning_rate": 1.0716741568240056e-05, "loss": 0.7863, "step": 1649 }, { "epoch": 0.6575670020922586, "grad_norm": 0.2310467368157676, "learning_rate": 1.0694335269732412e-05, "loss": 0.7935, "step": 1650 }, { "epoch": 0.6579655275480721, "grad_norm": 0.2519609841775046, "learning_rate": 1.0671943868047514e-05, "loss": 0.8174, "step": 1651 }, { "epoch": 0.6583640530038857, "grad_norm": 0.23381769850197567, "learning_rate": 1.0649567399030256e-05, "loss": 0.8125, "step": 1652 }, { "epoch": 0.6587625784596991, "grad_norm": 0.2248688496445257, "learning_rate": 1.0627205898501658e-05, "loss": 0.7631, "step": 1653 }, { "epoch": 0.6591611039155126, "grad_norm": 0.24042601112993525, "learning_rate": 1.0604859402258749e-05, "loss": 0.8093, "step": 1654 }, { "epoch": 0.6595596293713261, "grad_norm": 0.23829888619576395, "learning_rate": 1.0582527946074568e-05, "loss": 0.757, "step": 1655 }, { "epoch": 0.6599581548271396, "grad_norm": 0.24849887674234067, "learning_rate": 1.0560211565698065e-05, "loss": 0.7925, "step": 1656 }, { "epoch": 0.660356680282953, "grad_norm": 0.23966740664443098, "learning_rate": 1.053791029685405e-05, "loss": 0.7956, "step": 1657 }, { "epoch": 0.6607552057387666, "grad_norm": 0.2326370782463841, "learning_rate": 1.0515624175243162e-05, "loss": 0.7662, "step": 1658 }, { "epoch": 0.6611537311945801, "grad_norm": 0.31722454033580055, "learning_rate": 1.0493353236541762e-05, "loss": 0.7802, "step": 1659 }, { "epoch": 0.6615522566503935, "grad_norm": 0.25707007749842065, "learning_rate": 1.0471097516401936e-05, "loss": 0.8621, "step": 1660 }, { "epoch": 0.6619507821062071, "grad_norm": 0.24902572963184474, "learning_rate": 1.0448857050451378e-05, "loss": 0.7842, "step": 1661 }, { "epoch": 0.6623493075620205, "grad_norm": 0.24955167998517547, "learning_rate": 1.0426631874293375e-05, "loss": 0.8294, "step": 1662 }, { "epoch": 0.662747833017834, "grad_norm": 0.23384165302801938, "learning_rate": 1.0404422023506769e-05, "loss": 0.79, "step": 1663 }, { "epoch": 0.6631463584736476, "grad_norm": 0.2392972325732434, "learning_rate": 1.038222753364581e-05, "loss": 0.8006, "step": 1664 }, { "epoch": 0.663544883929461, "grad_norm": 0.24206783576164856, "learning_rate": 1.0360048440240211e-05, "loss": 0.8027, "step": 1665 }, { "epoch": 0.6639434093852745, "grad_norm": 0.22839516664163145, "learning_rate": 1.0337884778794993e-05, "loss": 0.7948, "step": 1666 }, { "epoch": 0.6643419348410879, "grad_norm": 0.2402973963775374, "learning_rate": 1.0315736584790507e-05, "loss": 0.8151, "step": 1667 }, { "epoch": 0.6647404602969015, "grad_norm": 0.2343262068157496, "learning_rate": 1.0293603893682327e-05, "loss": 0.7982, "step": 1668 }, { "epoch": 0.6651389857527149, "grad_norm": 0.23763455582566587, "learning_rate": 1.0271486740901215e-05, "loss": 0.8202, "step": 1669 }, { "epoch": 0.6655375112085284, "grad_norm": 0.22857617190624355, "learning_rate": 1.0249385161853064e-05, "loss": 0.8043, "step": 1670 }, { "epoch": 0.665936036664342, "grad_norm": 0.23554787626388524, "learning_rate": 1.0227299191918818e-05, "loss": 0.7754, "step": 1671 }, { "epoch": 0.6663345621201554, "grad_norm": 0.24185097085110915, "learning_rate": 1.0205228866454452e-05, "loss": 0.8149, "step": 1672 }, { "epoch": 0.6667330875759689, "grad_norm": 0.24371976817956506, "learning_rate": 1.018317422079087e-05, "loss": 0.7953, "step": 1673 }, { "epoch": 0.6671316130317824, "grad_norm": 0.22651548749239922, "learning_rate": 1.0161135290233928e-05, "loss": 0.7856, "step": 1674 }, { "epoch": 0.6675301384875959, "grad_norm": 0.23694878438384515, "learning_rate": 1.0139112110064265e-05, "loss": 0.7917, "step": 1675 }, { "epoch": 0.6679286639434094, "grad_norm": 0.23479229889643258, "learning_rate": 1.0117104715537338e-05, "loss": 0.7941, "step": 1676 }, { "epoch": 0.6683271893992229, "grad_norm": 0.24124394146663952, "learning_rate": 1.009511314188334e-05, "loss": 0.8183, "step": 1677 }, { "epoch": 0.6687257148550364, "grad_norm": 0.22678268771998955, "learning_rate": 1.0073137424307109e-05, "loss": 0.785, "step": 1678 }, { "epoch": 0.6691242403108498, "grad_norm": 0.2477335220816568, "learning_rate": 1.0051177597988122e-05, "loss": 0.8033, "step": 1679 }, { "epoch": 0.6695227657666634, "grad_norm": 0.23625778900717528, "learning_rate": 1.0029233698080415e-05, "loss": 0.8033, "step": 1680 }, { "epoch": 0.6699212912224768, "grad_norm": 0.23825465079514177, "learning_rate": 1.0007305759712533e-05, "loss": 0.7735, "step": 1681 }, { "epoch": 0.6703198166782903, "grad_norm": 0.22035417715886807, "learning_rate": 9.985393817987444e-06, "loss": 0.8073, "step": 1682 }, { "epoch": 0.6707183421341039, "grad_norm": 0.23849505686477043, "learning_rate": 9.963497907982532e-06, "loss": 0.8026, "step": 1683 }, { "epoch": 0.6711168675899173, "grad_norm": 0.2337573641381328, "learning_rate": 9.94161806474951e-06, "loss": 0.7889, "step": 1684 }, { "epoch": 0.6715153930457308, "grad_norm": 0.9103894523595338, "learning_rate": 9.919754323314372e-06, "loss": 0.792, "step": 1685 }, { "epoch": 0.6719139185015442, "grad_norm": 0.22893455291621617, "learning_rate": 9.897906718677344e-06, "loss": 0.782, "step": 1686 }, { "epoch": 0.6723124439573578, "grad_norm": 0.2372100351252991, "learning_rate": 9.87607528581279e-06, "loss": 0.8011, "step": 1687 }, { "epoch": 0.6727109694131712, "grad_norm": 0.23111628536958412, "learning_rate": 9.854260059669225e-06, "loss": 0.8025, "step": 1688 }, { "epoch": 0.6731094948689847, "grad_norm": 0.2368927356235449, "learning_rate": 9.832461075169184e-06, "loss": 0.8033, "step": 1689 }, { "epoch": 0.6735080203247983, "grad_norm": 0.22855325082673575, "learning_rate": 9.810678367209227e-06, "loss": 0.7911, "step": 1690 }, { "epoch": 0.6739065457806117, "grad_norm": 0.27522701488615475, "learning_rate": 9.788911970659848e-06, "loss": 0.7916, "step": 1691 }, { "epoch": 0.6743050712364252, "grad_norm": 0.24022760398565116, "learning_rate": 9.767161920365431e-06, "loss": 0.8037, "step": 1692 }, { "epoch": 0.6747035966922387, "grad_norm": 0.22778160452010449, "learning_rate": 9.7454282511442e-06, "loss": 0.8169, "step": 1693 }, { "epoch": 0.6751021221480522, "grad_norm": 0.2307169634206417, "learning_rate": 9.723710997788134e-06, "loss": 0.7951, "step": 1694 }, { "epoch": 0.6755006476038657, "grad_norm": 0.2278130241658777, "learning_rate": 9.702010195062957e-06, "loss": 0.804, "step": 1695 }, { "epoch": 0.6758991730596792, "grad_norm": 0.23860918505971207, "learning_rate": 9.68032587770803e-06, "loss": 0.7775, "step": 1696 }, { "epoch": 0.6762976985154927, "grad_norm": 0.23206722403706048, "learning_rate": 9.65865808043636e-06, "loss": 0.7717, "step": 1697 }, { "epoch": 0.6766962239713061, "grad_norm": 0.2424939487602499, "learning_rate": 9.637006837934491e-06, "loss": 0.8284, "step": 1698 }, { "epoch": 0.6770947494271197, "grad_norm": 0.2422935170368267, "learning_rate": 9.61537218486245e-06, "loss": 0.7982, "step": 1699 }, { "epoch": 0.6774932748829331, "grad_norm": 0.268912315082055, "learning_rate": 9.593754155853736e-06, "loss": 0.8025, "step": 1700 }, { "epoch": 0.6778918003387466, "grad_norm": 0.24641465322988168, "learning_rate": 9.572152785515206e-06, "loss": 0.796, "step": 1701 }, { "epoch": 0.6782903257945602, "grad_norm": 0.23523832181072415, "learning_rate": 9.550568108427067e-06, "loss": 0.7945, "step": 1702 }, { "epoch": 0.6786888512503736, "grad_norm": 0.23985080041043766, "learning_rate": 9.529000159142806e-06, "loss": 0.7967, "step": 1703 }, { "epoch": 0.6790873767061871, "grad_norm": 0.24109034221158648, "learning_rate": 9.507448972189124e-06, "loss": 0.809, "step": 1704 }, { "epoch": 0.6794859021620006, "grad_norm": 0.3279682419994762, "learning_rate": 9.485914582065893e-06, "loss": 0.7976, "step": 1705 }, { "epoch": 0.6798844276178141, "grad_norm": 0.24600000203117356, "learning_rate": 9.464397023246086e-06, "loss": 0.798, "step": 1706 }, { "epoch": 0.6802829530736276, "grad_norm": 0.25198127703741363, "learning_rate": 9.442896330175736e-06, "loss": 0.7666, "step": 1707 }, { "epoch": 0.680681478529441, "grad_norm": 0.24602873394094937, "learning_rate": 9.421412537273888e-06, "loss": 0.8296, "step": 1708 }, { "epoch": 0.6810800039852546, "grad_norm": 0.2462861171716341, "learning_rate": 9.399945678932518e-06, "loss": 0.7671, "step": 1709 }, { "epoch": 0.681478529441068, "grad_norm": 0.2356910774374406, "learning_rate": 9.378495789516511e-06, "loss": 0.8005, "step": 1710 }, { "epoch": 0.6818770548968816, "grad_norm": 0.26676136395934497, "learning_rate": 9.357062903363559e-06, "loss": 0.7966, "step": 1711 }, { "epoch": 0.682275580352695, "grad_norm": 0.22412533500879198, "learning_rate": 9.335647054784163e-06, "loss": 0.7837, "step": 1712 }, { "epoch": 0.6826741058085085, "grad_norm": 0.24899863246739254, "learning_rate": 9.314248278061524e-06, "loss": 0.8113, "step": 1713 }, { "epoch": 0.683072631264322, "grad_norm": 0.2518131395877076, "learning_rate": 9.292866607451534e-06, "loss": 0.7868, "step": 1714 }, { "epoch": 0.6834711567201355, "grad_norm": 0.2384173486107651, "learning_rate": 9.271502077182697e-06, "loss": 0.7748, "step": 1715 }, { "epoch": 0.683869682175949, "grad_norm": 0.5967497241397911, "learning_rate": 9.250154721456075e-06, "loss": 0.7962, "step": 1716 }, { "epoch": 0.6842682076317624, "grad_norm": 0.24269806832216176, "learning_rate": 9.22882457444524e-06, "loss": 0.8026, "step": 1717 }, { "epoch": 0.684666733087576, "grad_norm": 0.23438959649008212, "learning_rate": 9.207511670296204e-06, "loss": 0.795, "step": 1718 }, { "epoch": 0.6850652585433895, "grad_norm": 0.24041761239392234, "learning_rate": 9.186216043127388e-06, "loss": 0.8214, "step": 1719 }, { "epoch": 0.6854637839992029, "grad_norm": 0.2415192222064715, "learning_rate": 9.16493772702955e-06, "loss": 0.7907, "step": 1720 }, { "epoch": 0.6858623094550165, "grad_norm": 0.25457580261405643, "learning_rate": 9.143676756065752e-06, "loss": 0.7912, "step": 1721 }, { "epoch": 0.6862608349108299, "grad_norm": 0.24138741526314378, "learning_rate": 9.122433164271252e-06, "loss": 0.7952, "step": 1722 }, { "epoch": 0.6866593603666434, "grad_norm": 0.23982959026182568, "learning_rate": 9.101206985653523e-06, "loss": 0.8109, "step": 1723 }, { "epoch": 0.6870578858224569, "grad_norm": 0.23128247905861088, "learning_rate": 9.079998254192157e-06, "loss": 0.7996, "step": 1724 }, { "epoch": 0.6874564112782704, "grad_norm": 0.23257176458111745, "learning_rate": 9.058807003838792e-06, "loss": 0.7959, "step": 1725 }, { "epoch": 0.6878549367340839, "grad_norm": 0.2514299885659865, "learning_rate": 9.037633268517105e-06, "loss": 0.8007, "step": 1726 }, { "epoch": 0.6882534621898974, "grad_norm": 0.2296427095516536, "learning_rate": 9.016477082122727e-06, "loss": 0.7671, "step": 1727 }, { "epoch": 0.6886519876457109, "grad_norm": 0.24370730489409603, "learning_rate": 8.995338478523206e-06, "loss": 0.8123, "step": 1728 }, { "epoch": 0.6890505131015243, "grad_norm": 0.23578511930028617, "learning_rate": 8.974217491557916e-06, "loss": 0.7964, "step": 1729 }, { "epoch": 0.6894490385573379, "grad_norm": 0.23684202240770086, "learning_rate": 8.953114155038059e-06, "loss": 0.7808, "step": 1730 }, { "epoch": 0.6898475640131514, "grad_norm": 0.22699784086777558, "learning_rate": 8.932028502746563e-06, "loss": 0.7959, "step": 1731 }, { "epoch": 0.6902460894689648, "grad_norm": 0.24063862708544978, "learning_rate": 8.910960568438058e-06, "loss": 0.789, "step": 1732 }, { "epoch": 0.6906446149247784, "grad_norm": 0.22874206732454588, "learning_rate": 8.889910385838813e-06, "loss": 0.7826, "step": 1733 }, { "epoch": 0.6910431403805918, "grad_norm": 0.2250049276809127, "learning_rate": 8.868877988646656e-06, "loss": 0.7941, "step": 1734 }, { "epoch": 0.6914416658364053, "grad_norm": 0.22799809229676088, "learning_rate": 8.847863410530973e-06, "loss": 0.8039, "step": 1735 }, { "epoch": 0.6918401912922187, "grad_norm": 0.22068818384437014, "learning_rate": 8.826866685132597e-06, "loss": 0.764, "step": 1736 }, { "epoch": 0.6922387167480323, "grad_norm": 0.23302636532036256, "learning_rate": 8.805887846063793e-06, "loss": 0.7814, "step": 1737 }, { "epoch": 0.6926372422038458, "grad_norm": 0.2235081586612528, "learning_rate": 8.784926926908228e-06, "loss": 0.7906, "step": 1738 }, { "epoch": 0.6930357676596592, "grad_norm": 0.23695689079275012, "learning_rate": 8.763983961220818e-06, "loss": 0.7948, "step": 1739 }, { "epoch": 0.6934342931154728, "grad_norm": 0.24343892771165315, "learning_rate": 8.74305898252779e-06, "loss": 0.777, "step": 1740 }, { "epoch": 0.6938328185712862, "grad_norm": 0.2403895498767754, "learning_rate": 8.72215202432654e-06, "loss": 0.8093, "step": 1741 }, { "epoch": 0.6942313440270997, "grad_norm": 0.23104547501067635, "learning_rate": 8.701263120085643e-06, "loss": 0.7747, "step": 1742 }, { "epoch": 0.6946298694829133, "grad_norm": 0.2399257360677753, "learning_rate": 8.680392303244762e-06, "loss": 0.7887, "step": 1743 }, { "epoch": 0.6950283949387267, "grad_norm": 0.2298960897757004, "learning_rate": 8.659539607214609e-06, "loss": 0.805, "step": 1744 }, { "epoch": 0.6954269203945402, "grad_norm": 0.22209674980320604, "learning_rate": 8.638705065376887e-06, "loss": 0.7882, "step": 1745 }, { "epoch": 0.6958254458503537, "grad_norm": 0.22996129591563572, "learning_rate": 8.617888711084225e-06, "loss": 0.7907, "step": 1746 }, { "epoch": 0.6962239713061672, "grad_norm": 0.23756147299275276, "learning_rate": 8.597090577660158e-06, "loss": 0.8248, "step": 1747 }, { "epoch": 0.6966224967619806, "grad_norm": 0.23089712940348142, "learning_rate": 8.576310698399031e-06, "loss": 0.7827, "step": 1748 }, { "epoch": 0.6970210222177942, "grad_norm": 0.22154445039007642, "learning_rate": 8.555549106565981e-06, "loss": 0.7987, "step": 1749 }, { "epoch": 0.6974195476736077, "grad_norm": 0.2331241726825461, "learning_rate": 8.534805835396866e-06, "loss": 0.8262, "step": 1750 }, { "epoch": 0.6978180731294211, "grad_norm": 0.22789526498273438, "learning_rate": 8.514080918098218e-06, "loss": 0.7886, "step": 1751 }, { "epoch": 0.6982165985852347, "grad_norm": 0.21821146925663867, "learning_rate": 8.49337438784719e-06, "loss": 0.801, "step": 1752 }, { "epoch": 0.6986151240410481, "grad_norm": 0.23508205049301503, "learning_rate": 8.472686277791485e-06, "loss": 0.7643, "step": 1753 }, { "epoch": 0.6990136494968616, "grad_norm": 0.22461888065681415, "learning_rate": 8.452016621049333e-06, "loss": 0.7991, "step": 1754 }, { "epoch": 0.699412174952675, "grad_norm": 0.21803368130601183, "learning_rate": 8.431365450709419e-06, "loss": 0.7987, "step": 1755 }, { "epoch": 0.6998107004084886, "grad_norm": 0.23740898039198863, "learning_rate": 8.410732799830845e-06, "loss": 0.7915, "step": 1756 }, { "epoch": 0.7002092258643021, "grad_norm": 0.261735854629893, "learning_rate": 8.39011870144304e-06, "loss": 0.7955, "step": 1757 }, { "epoch": 0.7006077513201155, "grad_norm": 0.2180685253328265, "learning_rate": 8.369523188545756e-06, "loss": 0.8028, "step": 1758 }, { "epoch": 0.7010062767759291, "grad_norm": 0.2301419951414697, "learning_rate": 8.348946294108996e-06, "loss": 0.8103, "step": 1759 }, { "epoch": 0.7014048022317425, "grad_norm": 0.22024932183589127, "learning_rate": 8.328388051072922e-06, "loss": 0.7928, "step": 1760 }, { "epoch": 0.701803327687556, "grad_norm": 0.4020336814790439, "learning_rate": 8.307848492347899e-06, "loss": 0.8011, "step": 1761 }, { "epoch": 0.7022018531433696, "grad_norm": 0.22024662257821778, "learning_rate": 8.287327650814323e-06, "loss": 0.8119, "step": 1762 }, { "epoch": 0.702600378599183, "grad_norm": 0.27996952780116363, "learning_rate": 8.266825559322667e-06, "loss": 0.7987, "step": 1763 }, { "epoch": 0.7029989040549965, "grad_norm": 0.22630541171175222, "learning_rate": 8.246342250693354e-06, "loss": 0.817, "step": 1764 }, { "epoch": 0.70339742951081, "grad_norm": 0.2290021039403852, "learning_rate": 8.225877757716768e-06, "loss": 0.7959, "step": 1765 }, { "epoch": 0.7037959549666235, "grad_norm": 0.2216297139655694, "learning_rate": 8.205432113153158e-06, "loss": 0.7791, "step": 1766 }, { "epoch": 0.7041944804224369, "grad_norm": 0.22804574889964005, "learning_rate": 8.185005349732605e-06, "loss": 0.8041, "step": 1767 }, { "epoch": 0.7045930058782505, "grad_norm": 0.21915038552906846, "learning_rate": 8.16459750015497e-06, "loss": 0.7919, "step": 1768 }, { "epoch": 0.704991531334064, "grad_norm": 0.23641715849802888, "learning_rate": 8.144208597089814e-06, "loss": 0.7684, "step": 1769 }, { "epoch": 0.7053900567898774, "grad_norm": 0.23150971294969083, "learning_rate": 8.123838673176396e-06, "loss": 0.8268, "step": 1770 }, { "epoch": 0.705788582245691, "grad_norm": 0.22487777470325962, "learning_rate": 8.103487761023559e-06, "loss": 0.7952, "step": 1771 }, { "epoch": 0.7061871077015044, "grad_norm": 0.22359225895687845, "learning_rate": 8.08315589320975e-06, "loss": 0.7942, "step": 1772 }, { "epoch": 0.7065856331573179, "grad_norm": 0.22728777425623412, "learning_rate": 8.062843102282916e-06, "loss": 0.7979, "step": 1773 }, { "epoch": 0.7069841586131315, "grad_norm": 0.32242287769373923, "learning_rate": 8.042549420760437e-06, "loss": 0.7758, "step": 1774 }, { "epoch": 0.7073826840689449, "grad_norm": 0.23211148591348726, "learning_rate": 8.022274881129146e-06, "loss": 0.7932, "step": 1775 }, { "epoch": 0.7077812095247584, "grad_norm": 0.23149548150957583, "learning_rate": 8.002019515845194e-06, "loss": 0.781, "step": 1776 }, { "epoch": 0.7081797349805719, "grad_norm": 0.23571260576059858, "learning_rate": 7.981783357334061e-06, "loss": 0.8099, "step": 1777 }, { "epoch": 0.7085782604363854, "grad_norm": 0.23684120441719464, "learning_rate": 7.961566437990475e-06, "loss": 0.7925, "step": 1778 }, { "epoch": 0.7089767858921988, "grad_norm": 0.23808835745048676, "learning_rate": 7.941368790178365e-06, "loss": 0.8035, "step": 1779 }, { "epoch": 0.7093753113480123, "grad_norm": 0.24734022897944857, "learning_rate": 7.921190446230813e-06, "loss": 0.7797, "step": 1780 }, { "epoch": 0.7097738368038259, "grad_norm": 0.2453484186566751, "learning_rate": 7.901031438449982e-06, "loss": 0.819, "step": 1781 }, { "epoch": 0.7101723622596393, "grad_norm": 0.22709522154253955, "learning_rate": 7.880891799107108e-06, "loss": 0.8394, "step": 1782 }, { "epoch": 0.7105708877154528, "grad_norm": 0.24346320063244078, "learning_rate": 7.860771560442384e-06, "loss": 0.8114, "step": 1783 }, { "epoch": 0.7109694131712663, "grad_norm": 0.23923932846526716, "learning_rate": 7.84067075466499e-06, "loss": 0.7866, "step": 1784 }, { "epoch": 0.7113679386270798, "grad_norm": 0.24156935661046483, "learning_rate": 7.820589413952976e-06, "loss": 0.7792, "step": 1785 }, { "epoch": 0.7117664640828933, "grad_norm": 0.24507452424550918, "learning_rate": 7.800527570453215e-06, "loss": 0.7986, "step": 1786 }, { "epoch": 0.7121649895387068, "grad_norm": 0.22251550647565904, "learning_rate": 7.780485256281402e-06, "loss": 0.7733, "step": 1787 }, { "epoch": 0.7125635149945203, "grad_norm": 0.2426455233626753, "learning_rate": 7.760462503521933e-06, "loss": 0.7954, "step": 1788 }, { "epoch": 0.7129620404503337, "grad_norm": 0.23577702373705983, "learning_rate": 7.740459344227918e-06, "loss": 0.7985, "step": 1789 }, { "epoch": 0.7133605659061473, "grad_norm": 0.24472106889910925, "learning_rate": 7.720475810421088e-06, "loss": 0.7924, "step": 1790 }, { "epoch": 0.7137590913619607, "grad_norm": 0.23276012167993276, "learning_rate": 7.700511934091763e-06, "loss": 0.8098, "step": 1791 }, { "epoch": 0.7141576168177742, "grad_norm": 0.2227128937074685, "learning_rate": 7.680567747198797e-06, "loss": 0.8368, "step": 1792 }, { "epoch": 0.7145561422735878, "grad_norm": 0.35446105784971366, "learning_rate": 7.660643281669502e-06, "loss": 0.7913, "step": 1793 }, { "epoch": 0.7149546677294012, "grad_norm": 0.23973566003992375, "learning_rate": 7.640738569399645e-06, "loss": 0.8357, "step": 1794 }, { "epoch": 0.7153531931852147, "grad_norm": 0.2391665089124275, "learning_rate": 7.620853642253363e-06, "loss": 0.8133, "step": 1795 }, { "epoch": 0.7157517186410282, "grad_norm": 0.23522226900870816, "learning_rate": 7.600988532063125e-06, "loss": 0.7926, "step": 1796 }, { "epoch": 0.7161502440968417, "grad_norm": 0.24554684562043907, "learning_rate": 7.58114327062966e-06, "loss": 0.7709, "step": 1797 }, { "epoch": 0.7165487695526552, "grad_norm": 0.22714750497856911, "learning_rate": 7.561317889721937e-06, "loss": 0.7818, "step": 1798 }, { "epoch": 0.7169472950084687, "grad_norm": 0.23827899963595306, "learning_rate": 7.541512421077106e-06, "loss": 0.7728, "step": 1799 }, { "epoch": 0.7173458204642822, "grad_norm": 0.48457590255842975, "learning_rate": 7.521726896400414e-06, "loss": 0.7739, "step": 1800 }, { "epoch": 0.7177443459200956, "grad_norm": 0.23582475280902745, "learning_rate": 7.50196134736521e-06, "loss": 0.8168, "step": 1801 }, { "epoch": 0.7181428713759092, "grad_norm": 0.2380146361056826, "learning_rate": 7.482215805612847e-06, "loss": 0.7779, "step": 1802 }, { "epoch": 0.7185413968317226, "grad_norm": 0.2360276005567584, "learning_rate": 7.462490302752665e-06, "loss": 0.7864, "step": 1803 }, { "epoch": 0.7189399222875361, "grad_norm": 0.22814213757245871, "learning_rate": 7.442784870361903e-06, "loss": 0.8191, "step": 1804 }, { "epoch": 0.7193384477433497, "grad_norm": 0.24107281393643026, "learning_rate": 7.42309953998569e-06, "loss": 0.7838, "step": 1805 }, { "epoch": 0.7197369731991631, "grad_norm": 0.24232433035462758, "learning_rate": 7.4034343431369685e-06, "loss": 0.7977, "step": 1806 }, { "epoch": 0.7201354986549766, "grad_norm": 0.38084230051806445, "learning_rate": 7.38378931129645e-06, "loss": 0.8043, "step": 1807 }, { "epoch": 0.72053402411079, "grad_norm": 0.24397034947179694, "learning_rate": 7.364164475912572e-06, "loss": 0.8068, "step": 1808 }, { "epoch": 0.7209325495666036, "grad_norm": 0.4613176607526505, "learning_rate": 7.344559868401422e-06, "loss": 0.7877, "step": 1809 }, { "epoch": 0.7213310750224171, "grad_norm": 0.23005075594522995, "learning_rate": 7.3249755201467335e-06, "loss": 0.7722, "step": 1810 }, { "epoch": 0.7217296004782305, "grad_norm": 0.2387695579592527, "learning_rate": 7.305411462499776e-06, "loss": 0.8201, "step": 1811 }, { "epoch": 0.7221281259340441, "grad_norm": 0.2344269204447853, "learning_rate": 7.2858677267793635e-06, "loss": 0.7815, "step": 1812 }, { "epoch": 0.7225266513898575, "grad_norm": 0.2280635583340256, "learning_rate": 7.26634434427177e-06, "loss": 0.7814, "step": 1813 }, { "epoch": 0.722925176845671, "grad_norm": 0.2328509307005202, "learning_rate": 7.246841346230684e-06, "loss": 0.7695, "step": 1814 }, { "epoch": 0.7233237023014845, "grad_norm": 0.2237984273349448, "learning_rate": 7.227358763877172e-06, "loss": 0.8082, "step": 1815 }, { "epoch": 0.723722227757298, "grad_norm": 0.24293928069372236, "learning_rate": 7.207896628399598e-06, "loss": 0.8018, "step": 1816 }, { "epoch": 0.7241207532131115, "grad_norm": 0.22708584207065824, "learning_rate": 7.1884549709536115e-06, "loss": 0.788, "step": 1817 }, { "epoch": 0.724519278668925, "grad_norm": 0.23024391469364716, "learning_rate": 7.169033822662077e-06, "loss": 0.7722, "step": 1818 }, { "epoch": 0.7249178041247385, "grad_norm": 0.21908469252061188, "learning_rate": 7.149633214615022e-06, "loss": 0.7757, "step": 1819 }, { "epoch": 0.7253163295805519, "grad_norm": 0.23374912363797343, "learning_rate": 7.130253177869606e-06, "loss": 0.8123, "step": 1820 }, { "epoch": 0.7257148550363655, "grad_norm": 0.23339945263366027, "learning_rate": 7.1108937434500335e-06, "loss": 0.8145, "step": 1821 }, { "epoch": 0.7261133804921789, "grad_norm": 0.22566815004670457, "learning_rate": 7.091554942347551e-06, "loss": 0.7879, "step": 1822 }, { "epoch": 0.7265119059479924, "grad_norm": 0.22495869682272615, "learning_rate": 7.072236805520358e-06, "loss": 0.7979, "step": 1823 }, { "epoch": 0.726910431403806, "grad_norm": 0.2376828902036485, "learning_rate": 7.052939363893583e-06, "loss": 0.8208, "step": 1824 }, { "epoch": 0.7273089568596194, "grad_norm": 0.23450024068687056, "learning_rate": 7.033662648359225e-06, "loss": 0.7824, "step": 1825 }, { "epoch": 0.7277074823154329, "grad_norm": 0.22685374818541473, "learning_rate": 7.014406689776101e-06, "loss": 0.7876, "step": 1826 }, { "epoch": 0.7281060077712463, "grad_norm": 0.23011276016836252, "learning_rate": 6.995171518969808e-06, "loss": 0.8075, "step": 1827 }, { "epoch": 0.7285045332270599, "grad_norm": 0.24933093286417946, "learning_rate": 6.975957166732645e-06, "loss": 0.7662, "step": 1828 }, { "epoch": 0.7289030586828734, "grad_norm": 0.22506531353014372, "learning_rate": 6.956763663823602e-06, "loss": 0.7808, "step": 1829 }, { "epoch": 0.7293015841386868, "grad_norm": 0.23401655584722747, "learning_rate": 6.937591040968288e-06, "loss": 0.8209, "step": 1830 }, { "epoch": 0.7297001095945004, "grad_norm": 0.24774972767529824, "learning_rate": 6.918439328858892e-06, "loss": 0.7712, "step": 1831 }, { "epoch": 0.7300986350503138, "grad_norm": 0.23342909513340782, "learning_rate": 6.89930855815411e-06, "loss": 0.7994, "step": 1832 }, { "epoch": 0.7304971605061273, "grad_norm": 0.22394459360997282, "learning_rate": 6.880198759479133e-06, "loss": 0.8042, "step": 1833 }, { "epoch": 0.7308956859619408, "grad_norm": 0.23360743949550875, "learning_rate": 6.861109963425578e-06, "loss": 0.7916, "step": 1834 }, { "epoch": 0.7312942114177543, "grad_norm": 0.22281906219641856, "learning_rate": 6.8420422005514266e-06, "loss": 0.8137, "step": 1835 }, { "epoch": 0.7316927368735678, "grad_norm": 0.22014312278105563, "learning_rate": 6.822995501380998e-06, "loss": 0.8021, "step": 1836 }, { "epoch": 0.7320912623293813, "grad_norm": 0.2257715944227968, "learning_rate": 6.803969896404896e-06, "loss": 0.784, "step": 1837 }, { "epoch": 0.7324897877851948, "grad_norm": 0.24155855616319677, "learning_rate": 6.784965416079961e-06, "loss": 0.7933, "step": 1838 }, { "epoch": 0.7328883132410082, "grad_norm": 0.22107207590046762, "learning_rate": 6.765982090829189e-06, "loss": 0.784, "step": 1839 }, { "epoch": 0.7332868386968218, "grad_norm": 0.21216318175362134, "learning_rate": 6.74701995104174e-06, "loss": 0.8023, "step": 1840 }, { "epoch": 0.7336853641526353, "grad_norm": 0.2513348774684416, "learning_rate": 6.728079027072847e-06, "loss": 0.8255, "step": 1841 }, { "epoch": 0.7340838896084487, "grad_norm": 0.23421026990778565, "learning_rate": 6.709159349243781e-06, "loss": 0.8255, "step": 1842 }, { "epoch": 0.7344824150642623, "grad_norm": 0.20679965719103174, "learning_rate": 6.690260947841809e-06, "loss": 0.7863, "step": 1843 }, { "epoch": 0.7348809405200757, "grad_norm": 0.24196895097156834, "learning_rate": 6.671383853120117e-06, "loss": 0.8162, "step": 1844 }, { "epoch": 0.7352794659758892, "grad_norm": 0.23539184150189893, "learning_rate": 6.652528095297812e-06, "loss": 0.7788, "step": 1845 }, { "epoch": 0.7356779914317026, "grad_norm": 0.2158639231432844, "learning_rate": 6.633693704559814e-06, "loss": 0.8077, "step": 1846 }, { "epoch": 0.7360765168875162, "grad_norm": 0.23071528135591446, "learning_rate": 6.614880711056853e-06, "loss": 0.7774, "step": 1847 }, { "epoch": 0.7364750423433297, "grad_norm": 0.22552702501791788, "learning_rate": 6.596089144905422e-06, "loss": 0.7794, "step": 1848 }, { "epoch": 0.7368735677991431, "grad_norm": 0.2330734404526342, "learning_rate": 6.577319036187679e-06, "loss": 0.79, "step": 1849 }, { "epoch": 0.7372720932549567, "grad_norm": 0.2265375246131879, "learning_rate": 6.558570414951462e-06, "loss": 0.7922, "step": 1850 }, { "epoch": 0.7376706187107701, "grad_norm": 0.22667338696640402, "learning_rate": 6.539843311210181e-06, "loss": 0.7796, "step": 1851 }, { "epoch": 0.7380691441665836, "grad_norm": 0.23040531636916783, "learning_rate": 6.521137754942828e-06, "loss": 0.8163, "step": 1852 }, { "epoch": 0.7384676696223972, "grad_norm": 0.22397477455791673, "learning_rate": 6.5024537760938886e-06, "loss": 0.8049, "step": 1853 }, { "epoch": 0.7388661950782106, "grad_norm": 0.21837702568211942, "learning_rate": 6.483791404573305e-06, "loss": 0.7899, "step": 1854 }, { "epoch": 0.7392647205340241, "grad_norm": 0.23621768578628966, "learning_rate": 6.465150670256441e-06, "loss": 0.8131, "step": 1855 }, { "epoch": 0.7396632459898376, "grad_norm": 0.22441226758524066, "learning_rate": 6.446531602984003e-06, "loss": 0.8044, "step": 1856 }, { "epoch": 0.7400617714456511, "grad_norm": 0.21742047573106374, "learning_rate": 6.427934232562034e-06, "loss": 0.7779, "step": 1857 }, { "epoch": 0.7404602969014645, "grad_norm": 0.2177698894735104, "learning_rate": 6.409358588761814e-06, "loss": 0.7894, "step": 1858 }, { "epoch": 0.7408588223572781, "grad_norm": 0.22916632915750462, "learning_rate": 6.39080470131989e-06, "loss": 0.7928, "step": 1859 }, { "epoch": 0.7412573478130916, "grad_norm": 0.22082966691884467, "learning_rate": 6.37227259993793e-06, "loss": 0.7915, "step": 1860 }, { "epoch": 0.741655873268905, "grad_norm": 0.2241200766337397, "learning_rate": 6.353762314282757e-06, "loss": 0.7779, "step": 1861 }, { "epoch": 0.7420543987247186, "grad_norm": 0.23702387172593264, "learning_rate": 6.335273873986267e-06, "loss": 0.7829, "step": 1862 }, { "epoch": 0.742452924180532, "grad_norm": 0.2527038905168017, "learning_rate": 6.316807308645367e-06, "loss": 0.7829, "step": 1863 }, { "epoch": 0.7428514496363455, "grad_norm": 0.23475628446887611, "learning_rate": 6.2983626478219695e-06, "loss": 0.7999, "step": 1864 }, { "epoch": 0.7432499750921591, "grad_norm": 0.23416030882805897, "learning_rate": 6.279939921042906e-06, "loss": 0.8085, "step": 1865 }, { "epoch": 0.7436485005479725, "grad_norm": 0.23262020269941716, "learning_rate": 6.261539157799912e-06, "loss": 0.8256, "step": 1866 }, { "epoch": 0.744047026003786, "grad_norm": 0.217504432107485, "learning_rate": 6.243160387549534e-06, "loss": 0.7919, "step": 1867 }, { "epoch": 0.7444455514595995, "grad_norm": 0.22220778420283688, "learning_rate": 6.224803639713138e-06, "loss": 0.7531, "step": 1868 }, { "epoch": 0.744844076915413, "grad_norm": 0.21437200486409036, "learning_rate": 6.206468943676831e-06, "loss": 0.7965, "step": 1869 }, { "epoch": 0.7452426023712264, "grad_norm": 0.23487795253335572, "learning_rate": 6.188156328791397e-06, "loss": 0.8301, "step": 1870 }, { "epoch": 0.74564112782704, "grad_norm": 0.21763886551801245, "learning_rate": 6.169865824372314e-06, "loss": 0.7875, "step": 1871 }, { "epoch": 0.7460396532828535, "grad_norm": 0.22604818846373181, "learning_rate": 6.151597459699621e-06, "loss": 0.8054, "step": 1872 }, { "epoch": 0.7464381787386669, "grad_norm": 0.21771303595209707, "learning_rate": 6.133351264017939e-06, "loss": 0.7735, "step": 1873 }, { "epoch": 0.7468367041944804, "grad_norm": 0.21715354774157822, "learning_rate": 6.115127266536403e-06, "loss": 0.7762, "step": 1874 }, { "epoch": 0.7472352296502939, "grad_norm": 0.2157960601894358, "learning_rate": 6.0969254964285895e-06, "loss": 0.8153, "step": 1875 }, { "epoch": 0.7476337551061074, "grad_norm": 0.22332780451488388, "learning_rate": 6.0787459828325166e-06, "loss": 0.8143, "step": 1876 }, { "epoch": 0.748032280561921, "grad_norm": 0.2309153231971099, "learning_rate": 6.060588754850562e-06, "loss": 0.7899, "step": 1877 }, { "epoch": 0.7484308060177344, "grad_norm": 0.22898127613887323, "learning_rate": 6.042453841549438e-06, "loss": 0.8309, "step": 1878 }, { "epoch": 0.7488293314735479, "grad_norm": 0.21931059736091962, "learning_rate": 6.024341271960112e-06, "loss": 0.7921, "step": 1879 }, { "epoch": 0.7492278569293613, "grad_norm": 0.23434936881308505, "learning_rate": 6.006251075077809e-06, "loss": 0.7799, "step": 1880 }, { "epoch": 0.7496263823851749, "grad_norm": 0.2372270380137871, "learning_rate": 5.988183279861921e-06, "loss": 0.7829, "step": 1881 }, { "epoch": 0.7500249078409883, "grad_norm": 0.22942099098861327, "learning_rate": 5.970137915235992e-06, "loss": 0.7918, "step": 1882 }, { "epoch": 0.7504234332968018, "grad_norm": 0.2355040611383991, "learning_rate": 5.952115010087654e-06, "loss": 0.835, "step": 1883 }, { "epoch": 0.7508219587526154, "grad_norm": 0.2239708740237137, "learning_rate": 5.934114593268572e-06, "loss": 0.7781, "step": 1884 }, { "epoch": 0.7512204842084288, "grad_norm": 0.21984896769317516, "learning_rate": 5.916136693594434e-06, "loss": 0.7862, "step": 1885 }, { "epoch": 0.7516190096642423, "grad_norm": 0.2197233848994438, "learning_rate": 5.898181339844858e-06, "loss": 0.8147, "step": 1886 }, { "epoch": 0.7520175351200558, "grad_norm": 0.21853538967964484, "learning_rate": 5.880248560763384e-06, "loss": 0.7897, "step": 1887 }, { "epoch": 0.7524160605758693, "grad_norm": 0.2251548690545732, "learning_rate": 5.862338385057416e-06, "loss": 0.7984, "step": 1888 }, { "epoch": 0.7528145860316827, "grad_norm": 0.21585033327673825, "learning_rate": 5.844450841398166e-06, "loss": 0.7953, "step": 1889 }, { "epoch": 0.7532131114874963, "grad_norm": 0.22933572814422915, "learning_rate": 5.826585958420625e-06, "loss": 0.8006, "step": 1890 }, { "epoch": 0.7536116369433098, "grad_norm": 0.22747479613099156, "learning_rate": 5.80874376472349e-06, "loss": 0.7598, "step": 1891 }, { "epoch": 0.7540101623991232, "grad_norm": 0.21512314765889684, "learning_rate": 5.790924288869162e-06, "loss": 0.8148, "step": 1892 }, { "epoch": 0.7544086878549368, "grad_norm": 0.33438808323630886, "learning_rate": 5.773127559383638e-06, "loss": 0.7554, "step": 1893 }, { "epoch": 0.7548072133107502, "grad_norm": 0.22483670938682515, "learning_rate": 5.755353604756544e-06, "loss": 0.784, "step": 1894 }, { "epoch": 0.7552057387665637, "grad_norm": 0.21592647946477764, "learning_rate": 5.737602453441032e-06, "loss": 0.7715, "step": 1895 }, { "epoch": 0.7556042642223773, "grad_norm": 0.21691744670655036, "learning_rate": 5.719874133853725e-06, "loss": 0.7909, "step": 1896 }, { "epoch": 0.7560027896781907, "grad_norm": 0.23150710281578893, "learning_rate": 5.702168674374735e-06, "loss": 0.7983, "step": 1897 }, { "epoch": 0.7564013151340042, "grad_norm": 0.22053519786366013, "learning_rate": 5.6844861033475466e-06, "loss": 0.764, "step": 1898 }, { "epoch": 0.7567998405898176, "grad_norm": 0.21199239099110317, "learning_rate": 5.666826449079022e-06, "loss": 0.7872, "step": 1899 }, { "epoch": 0.7571983660456312, "grad_norm": 0.33723343359752794, "learning_rate": 5.649189739839331e-06, "loss": 0.8006, "step": 1900 }, { "epoch": 0.7575968915014446, "grad_norm": 0.22529144997723208, "learning_rate": 5.63157600386192e-06, "loss": 0.8264, "step": 1901 }, { "epoch": 0.7579954169572581, "grad_norm": 0.21629640216592316, "learning_rate": 5.613985269343456e-06, "loss": 0.7854, "step": 1902 }, { "epoch": 0.7583939424130717, "grad_norm": 0.22311405638594484, "learning_rate": 5.596417564443768e-06, "loss": 0.7773, "step": 1903 }, { "epoch": 0.7587924678688851, "grad_norm": 0.21547315103858006, "learning_rate": 5.578872917285838e-06, "loss": 0.7626, "step": 1904 }, { "epoch": 0.7591909933246986, "grad_norm": 0.22382658871923508, "learning_rate": 5.561351355955733e-06, "loss": 0.8059, "step": 1905 }, { "epoch": 0.7595895187805121, "grad_norm": 0.22341672646153143, "learning_rate": 5.543852908502565e-06, "loss": 0.7624, "step": 1906 }, { "epoch": 0.7599880442363256, "grad_norm": 0.21972426758841143, "learning_rate": 5.526377602938429e-06, "loss": 0.8004, "step": 1907 }, { "epoch": 0.7603865696921391, "grad_norm": 0.20999907442340116, "learning_rate": 5.508925467238391e-06, "loss": 0.7865, "step": 1908 }, { "epoch": 0.7607850951479526, "grad_norm": 0.21874631069378098, "learning_rate": 5.491496529340425e-06, "loss": 0.782, "step": 1909 }, { "epoch": 0.7611836206037661, "grad_norm": 0.2171739766459026, "learning_rate": 5.474090817145352e-06, "loss": 0.817, "step": 1910 }, { "epoch": 0.7615821460595795, "grad_norm": 0.23395913286116207, "learning_rate": 5.456708358516833e-06, "loss": 0.7909, "step": 1911 }, { "epoch": 0.7619806715153931, "grad_norm": 0.3909635390360292, "learning_rate": 5.439349181281293e-06, "loss": 0.783, "step": 1912 }, { "epoch": 0.7623791969712065, "grad_norm": 0.21817745960660756, "learning_rate": 5.422013313227896e-06, "loss": 0.7968, "step": 1913 }, { "epoch": 0.76277772242702, "grad_norm": 0.2129422416400334, "learning_rate": 5.404700782108476e-06, "loss": 0.7986, "step": 1914 }, { "epoch": 0.7631762478828336, "grad_norm": 0.24894816442926734, "learning_rate": 5.387411615637521e-06, "loss": 0.7838, "step": 1915 }, { "epoch": 0.763574773338647, "grad_norm": 0.21380121079954537, "learning_rate": 5.370145841492116e-06, "loss": 0.8042, "step": 1916 }, { "epoch": 0.7639732987944605, "grad_norm": 0.2145326012345622, "learning_rate": 5.352903487311893e-06, "loss": 0.7684, "step": 1917 }, { "epoch": 0.764371824250274, "grad_norm": 0.2238740099248399, "learning_rate": 5.3356845806990054e-06, "loss": 0.7789, "step": 1918 }, { "epoch": 0.7647703497060875, "grad_norm": 0.23241336202019805, "learning_rate": 5.318489149218047e-06, "loss": 0.7955, "step": 1919 }, { "epoch": 0.765168875161901, "grad_norm": 0.22274065294729253, "learning_rate": 5.301317220396056e-06, "loss": 0.7971, "step": 1920 }, { "epoch": 0.7655674006177144, "grad_norm": 0.21450551669208287, "learning_rate": 5.284168821722429e-06, "loss": 0.8039, "step": 1921 }, { "epoch": 0.765965926073528, "grad_norm": 0.22005637491103672, "learning_rate": 5.267043980648905e-06, "loss": 0.7785, "step": 1922 }, { "epoch": 0.7663644515293414, "grad_norm": 0.21711685516462279, "learning_rate": 5.249942724589508e-06, "loss": 0.7748, "step": 1923 }, { "epoch": 0.7667629769851549, "grad_norm": 0.21195870427677962, "learning_rate": 5.23286508092051e-06, "loss": 0.7791, "step": 1924 }, { "epoch": 0.7671615024409684, "grad_norm": 0.2215540780948147, "learning_rate": 5.215811076980384e-06, "loss": 0.7867, "step": 1925 }, { "epoch": 0.7675600278967819, "grad_norm": 0.2134811799235333, "learning_rate": 5.1987807400697465e-06, "loss": 0.8204, "step": 1926 }, { "epoch": 0.7679585533525954, "grad_norm": 0.21126480142948123, "learning_rate": 5.1817740974513394e-06, "loss": 0.7744, "step": 1927 }, { "epoch": 0.7683570788084089, "grad_norm": 0.21093921074309108, "learning_rate": 5.164791176349975e-06, "loss": 0.7804, "step": 1928 }, { "epoch": 0.7687556042642224, "grad_norm": 0.22232833723691933, "learning_rate": 5.147832003952482e-06, "loss": 0.8122, "step": 1929 }, { "epoch": 0.7691541297200358, "grad_norm": 0.21135760176592855, "learning_rate": 5.130896607407689e-06, "loss": 0.7837, "step": 1930 }, { "epoch": 0.7695526551758494, "grad_norm": 0.21690410153487147, "learning_rate": 5.113985013826337e-06, "loss": 0.8333, "step": 1931 }, { "epoch": 0.7699511806316629, "grad_norm": 0.22611226851018745, "learning_rate": 5.097097250281089e-06, "loss": 0.8336, "step": 1932 }, { "epoch": 0.7703497060874763, "grad_norm": 0.21422680254932244, "learning_rate": 5.080233343806435e-06, "loss": 0.7925, "step": 1933 }, { "epoch": 0.7707482315432899, "grad_norm": 0.21725411912202952, "learning_rate": 5.063393321398693e-06, "loss": 0.7682, "step": 1934 }, { "epoch": 0.7711467569991033, "grad_norm": 0.20486094819815992, "learning_rate": 5.046577210015941e-06, "loss": 0.7698, "step": 1935 }, { "epoch": 0.7715452824549168, "grad_norm": 0.21116949065534618, "learning_rate": 5.029785036577976e-06, "loss": 0.7839, "step": 1936 }, { "epoch": 0.7719438079107303, "grad_norm": 0.21365660447596332, "learning_rate": 5.013016827966289e-06, "loss": 0.794, "step": 1937 }, { "epoch": 0.7723423333665438, "grad_norm": 0.21986116163132582, "learning_rate": 4.996272611023978e-06, "loss": 0.8004, "step": 1938 }, { "epoch": 0.7727408588223573, "grad_norm": 0.21667082564742637, "learning_rate": 4.979552412555757e-06, "loss": 0.7955, "step": 1939 }, { "epoch": 0.7731393842781707, "grad_norm": 0.2131311718527391, "learning_rate": 4.962856259327888e-06, "loss": 0.8222, "step": 1940 }, { "epoch": 0.7735379097339843, "grad_norm": 0.20312498370931167, "learning_rate": 4.946184178068145e-06, "loss": 0.7777, "step": 1941 }, { "epoch": 0.7739364351897977, "grad_norm": 0.21157244173886958, "learning_rate": 4.929536195465743e-06, "loss": 0.7674, "step": 1942 }, { "epoch": 0.7743349606456112, "grad_norm": 0.21401144119856197, "learning_rate": 4.9129123381713426e-06, "loss": 0.8245, "step": 1943 }, { "epoch": 0.7747334861014247, "grad_norm": 0.21771908112415073, "learning_rate": 4.8963126327969844e-06, "loss": 0.8122, "step": 1944 }, { "epoch": 0.7751320115572382, "grad_norm": 0.21187987139599745, "learning_rate": 4.879737105916021e-06, "loss": 0.8179, "step": 1945 }, { "epoch": 0.7755305370130517, "grad_norm": 0.20845520286257718, "learning_rate": 4.863185784063136e-06, "loss": 0.7991, "step": 1946 }, { "epoch": 0.7759290624688652, "grad_norm": 0.21881307944899714, "learning_rate": 4.8466586937342315e-06, "loss": 0.7715, "step": 1947 }, { "epoch": 0.7763275879246787, "grad_norm": 0.22037508987905377, "learning_rate": 4.830155861386441e-06, "loss": 0.8178, "step": 1948 }, { "epoch": 0.7767261133804921, "grad_norm": 0.2188466732998409, "learning_rate": 4.813677313438045e-06, "loss": 0.7931, "step": 1949 }, { "epoch": 0.7771246388363057, "grad_norm": 0.22029271333920605, "learning_rate": 4.7972230762684695e-06, "loss": 0.7962, "step": 1950 }, { "epoch": 0.7775231642921192, "grad_norm": 0.21586985458048003, "learning_rate": 4.78079317621821e-06, "loss": 0.8035, "step": 1951 }, { "epoch": 0.7779216897479326, "grad_norm": 0.2122373168935699, "learning_rate": 4.7643876395888076e-06, "loss": 0.7668, "step": 1952 }, { "epoch": 0.7783202152037462, "grad_norm": 0.20775917857186701, "learning_rate": 4.748006492642805e-06, "loss": 0.7786, "step": 1953 }, { "epoch": 0.7787187406595596, "grad_norm": 0.21569140886208557, "learning_rate": 4.731649761603685e-06, "loss": 0.8067, "step": 1954 }, { "epoch": 0.7791172661153731, "grad_norm": 0.2131646673455944, "learning_rate": 4.715317472655863e-06, "loss": 0.7971, "step": 1955 }, { "epoch": 0.7795157915711866, "grad_norm": 0.2146175074423186, "learning_rate": 4.699009651944622e-06, "loss": 0.777, "step": 1956 }, { "epoch": 0.7799143170270001, "grad_norm": 0.21312837734855186, "learning_rate": 4.682726325576059e-06, "loss": 0.7932, "step": 1957 }, { "epoch": 0.7803128424828136, "grad_norm": 0.21781795703518547, "learning_rate": 4.666467519617093e-06, "loss": 0.8004, "step": 1958 }, { "epoch": 0.780711367938627, "grad_norm": 0.21181093024914874, "learning_rate": 4.650233260095354e-06, "loss": 0.7586, "step": 1959 }, { "epoch": 0.7811098933944406, "grad_norm": 0.21750201665933414, "learning_rate": 4.634023572999207e-06, "loss": 0.8103, "step": 1960 }, { "epoch": 0.781508418850254, "grad_norm": 0.21261609028271256, "learning_rate": 4.617838484277654e-06, "loss": 0.7794, "step": 1961 }, { "epoch": 0.7819069443060676, "grad_norm": 0.22127702762736784, "learning_rate": 4.601678019840339e-06, "loss": 0.824, "step": 1962 }, { "epoch": 0.7823054697618811, "grad_norm": 0.21167895347901275, "learning_rate": 4.585542205557478e-06, "loss": 0.7872, "step": 1963 }, { "epoch": 0.7827039952176945, "grad_norm": 0.20443014284749786, "learning_rate": 4.569431067259828e-06, "loss": 0.768, "step": 1964 }, { "epoch": 0.783102520673508, "grad_norm": 0.21508398213351645, "learning_rate": 4.553344630738654e-06, "loss": 0.7972, "step": 1965 }, { "epoch": 0.7835010461293215, "grad_norm": 0.21284922880197987, "learning_rate": 4.5372829217456515e-06, "loss": 0.7877, "step": 1966 }, { "epoch": 0.783899571585135, "grad_norm": 0.21149964459483625, "learning_rate": 4.5212459659929596e-06, "loss": 0.8317, "step": 1967 }, { "epoch": 0.7842980970409484, "grad_norm": 0.20959662240837698, "learning_rate": 4.505233789153063e-06, "loss": 0.7761, "step": 1968 }, { "epoch": 0.784696622496762, "grad_norm": 0.21566004770178748, "learning_rate": 4.489246416858814e-06, "loss": 0.7787, "step": 1969 }, { "epoch": 0.7850951479525755, "grad_norm": 0.20948032542954348, "learning_rate": 4.473283874703336e-06, "loss": 0.8001, "step": 1970 }, { "epoch": 0.7854936734083889, "grad_norm": 0.21171612340758303, "learning_rate": 4.457346188239997e-06, "loss": 0.7846, "step": 1971 }, { "epoch": 0.7858921988642025, "grad_norm": 0.211495224788516, "learning_rate": 4.4414333829823944e-06, "loss": 0.8205, "step": 1972 }, { "epoch": 0.7862907243200159, "grad_norm": 0.21182971426196345, "learning_rate": 4.425545484404272e-06, "loss": 0.817, "step": 1973 }, { "epoch": 0.7866892497758294, "grad_norm": 0.20652359587837626, "learning_rate": 4.409682517939527e-06, "loss": 0.7975, "step": 1974 }, { "epoch": 0.787087775231643, "grad_norm": 0.2039383627589195, "learning_rate": 4.393844508982124e-06, "loss": 0.7934, "step": 1975 }, { "epoch": 0.7874863006874564, "grad_norm": 0.20780785483145897, "learning_rate": 4.3780314828860895e-06, "loss": 0.7954, "step": 1976 }, { "epoch": 0.7878848261432699, "grad_norm": 0.2072740025638685, "learning_rate": 4.362243464965452e-06, "loss": 0.7901, "step": 1977 }, { "epoch": 0.7882833515990834, "grad_norm": 0.19867758615892187, "learning_rate": 4.346480480494197e-06, "loss": 0.7606, "step": 1978 }, { "epoch": 0.7886818770548969, "grad_norm": 0.21773075945607415, "learning_rate": 4.330742554706251e-06, "loss": 0.8123, "step": 1979 }, { "epoch": 0.7890804025107103, "grad_norm": 0.20266873734956298, "learning_rate": 4.315029712795404e-06, "loss": 0.799, "step": 1980 }, { "epoch": 0.7894789279665239, "grad_norm": 0.20650482471845288, "learning_rate": 4.299341979915324e-06, "loss": 0.7972, "step": 1981 }, { "epoch": 0.7898774534223374, "grad_norm": 0.20847406865766804, "learning_rate": 4.283679381179449e-06, "loss": 0.8187, "step": 1982 }, { "epoch": 0.7902759788781508, "grad_norm": 0.2077737716719368, "learning_rate": 4.268041941660998e-06, "loss": 0.8032, "step": 1983 }, { "epoch": 0.7906745043339644, "grad_norm": 0.20859031258363198, "learning_rate": 4.252429686392927e-06, "loss": 0.7706, "step": 1984 }, { "epoch": 0.7910730297897778, "grad_norm": 0.20953564600107155, "learning_rate": 4.236842640367844e-06, "loss": 0.7902, "step": 1985 }, { "epoch": 0.7914715552455913, "grad_norm": 0.1998647822957012, "learning_rate": 4.221280828538028e-06, "loss": 0.785, "step": 1986 }, { "epoch": 0.7918700807014049, "grad_norm": 0.2109037742269456, "learning_rate": 4.205744275815351e-06, "loss": 0.788, "step": 1987 }, { "epoch": 0.7922686061572183, "grad_norm": 0.3093393907121497, "learning_rate": 4.19023300707126e-06, "loss": 0.8089, "step": 1988 }, { "epoch": 0.7926671316130318, "grad_norm": 0.21256297107207034, "learning_rate": 4.174747047136707e-06, "loss": 0.7745, "step": 1989 }, { "epoch": 0.7930656570688452, "grad_norm": 0.5160365968905928, "learning_rate": 4.159286420802144e-06, "loss": 0.7948, "step": 1990 }, { "epoch": 0.7934641825246588, "grad_norm": 0.21126289660765277, "learning_rate": 4.1438511528174665e-06, "loss": 0.7918, "step": 1991 }, { "epoch": 0.7938627079804722, "grad_norm": 0.21794744648330014, "learning_rate": 4.1284412678919715e-06, "loss": 0.7843, "step": 1992 }, { "epoch": 0.7942612334362857, "grad_norm": 0.20868906992268485, "learning_rate": 4.11305679069433e-06, "loss": 0.8017, "step": 1993 }, { "epoch": 0.7946597588920993, "grad_norm": 0.21719069879632263, "learning_rate": 4.097697745852522e-06, "loss": 0.7973, "step": 1994 }, { "epoch": 0.7950582843479127, "grad_norm": 0.21142187004817078, "learning_rate": 4.08236415795384e-06, "loss": 0.7814, "step": 1995 }, { "epoch": 0.7954568098037262, "grad_norm": 0.2039420161311614, "learning_rate": 4.067056051544793e-06, "loss": 0.7889, "step": 1996 }, { "epoch": 0.7958553352595397, "grad_norm": 0.24194928974109936, "learning_rate": 4.051773451131127e-06, "loss": 0.7682, "step": 1997 }, { "epoch": 0.7962538607153532, "grad_norm": 0.2012545890604259, "learning_rate": 4.036516381177742e-06, "loss": 0.7782, "step": 1998 }, { "epoch": 0.7966523861711667, "grad_norm": 0.20970642629605174, "learning_rate": 4.02128486610867e-06, "loss": 0.8223, "step": 1999 }, { "epoch": 0.7970509116269802, "grad_norm": 0.20665659488141222, "learning_rate": 4.006078930307043e-06, "loss": 0.7812, "step": 2000 }, { "epoch": 0.7974494370827937, "grad_norm": 0.21749421417588286, "learning_rate": 3.9908985981150275e-06, "loss": 0.7676, "step": 2001 }, { "epoch": 0.7978479625386071, "grad_norm": 0.20888996451808617, "learning_rate": 3.975743893833821e-06, "loss": 0.8185, "step": 2002 }, { "epoch": 0.7982464879944207, "grad_norm": 0.2704077080536192, "learning_rate": 3.960614841723569e-06, "loss": 0.7838, "step": 2003 }, { "epoch": 0.7986450134502341, "grad_norm": 0.2088559508207916, "learning_rate": 3.945511466003391e-06, "loss": 0.8171, "step": 2004 }, { "epoch": 0.7990435389060476, "grad_norm": 0.20661415959125704, "learning_rate": 3.930433790851278e-06, "loss": 0.7754, "step": 2005 }, { "epoch": 0.7994420643618612, "grad_norm": 0.20701920533433565, "learning_rate": 3.915381840404071e-06, "loss": 0.7841, "step": 2006 }, { "epoch": 0.7998405898176746, "grad_norm": 0.21927395552931095, "learning_rate": 3.900355638757452e-06, "loss": 0.8029, "step": 2007 }, { "epoch": 0.8002391152734881, "grad_norm": 0.20280686560023278, "learning_rate": 3.885355209965865e-06, "loss": 0.7794, "step": 2008 }, { "epoch": 0.8006376407293015, "grad_norm": 0.22037706389941072, "learning_rate": 3.870380578042505e-06, "loss": 0.8098, "step": 2009 }, { "epoch": 0.8010361661851151, "grad_norm": 0.22041475186669696, "learning_rate": 3.85543176695927e-06, "loss": 0.803, "step": 2010 }, { "epoch": 0.8014346916409285, "grad_norm": 0.20998177604491353, "learning_rate": 3.840508800646725e-06, "loss": 0.8175, "step": 2011 }, { "epoch": 0.801833217096742, "grad_norm": 0.45165795643816325, "learning_rate": 3.825611702994061e-06, "loss": 0.8009, "step": 2012 }, { "epoch": 0.8022317425525556, "grad_norm": 0.21072158850784894, "learning_rate": 3.810740497849048e-06, "loss": 0.7807, "step": 2013 }, { "epoch": 0.802630268008369, "grad_norm": 0.2069068117921759, "learning_rate": 3.7958952090180145e-06, "loss": 0.8019, "step": 2014 }, { "epoch": 0.8030287934641825, "grad_norm": 0.21068337260203102, "learning_rate": 3.781075860265806e-06, "loss": 0.7816, "step": 2015 }, { "epoch": 0.803427318919996, "grad_norm": 0.21398934601155856, "learning_rate": 3.766282475315741e-06, "loss": 0.7638, "step": 2016 }, { "epoch": 0.8038258443758095, "grad_norm": 0.20441959178687177, "learning_rate": 3.7515150778495566e-06, "loss": 0.806, "step": 2017 }, { "epoch": 0.804224369831623, "grad_norm": 0.21249378504406466, "learning_rate": 3.7367736915074116e-06, "loss": 0.7552, "step": 2018 }, { "epoch": 0.8046228952874365, "grad_norm": 0.20661783667193465, "learning_rate": 3.7220583398878198e-06, "loss": 0.7926, "step": 2019 }, { "epoch": 0.80502142074325, "grad_norm": 0.2077752476136891, "learning_rate": 3.7073690465475996e-06, "loss": 0.8021, "step": 2020 }, { "epoch": 0.8054199461990634, "grad_norm": 0.20570938011934367, "learning_rate": 3.6927058350018774e-06, "loss": 0.7833, "step": 2021 }, { "epoch": 0.805818471654877, "grad_norm": 0.2068378623875997, "learning_rate": 3.678068728724018e-06, "loss": 0.7916, "step": 2022 }, { "epoch": 0.8062169971106904, "grad_norm": 0.2108307060112381, "learning_rate": 3.663457751145598e-06, "loss": 0.8342, "step": 2023 }, { "epoch": 0.8066155225665039, "grad_norm": 0.2078448862912843, "learning_rate": 3.648872925656357e-06, "loss": 0.7984, "step": 2024 }, { "epoch": 0.8070140480223175, "grad_norm": 0.21028048335603441, "learning_rate": 3.6343142756041804e-06, "loss": 0.8018, "step": 2025 }, { "epoch": 0.8074125734781309, "grad_norm": 0.20117720599120376, "learning_rate": 3.61978182429505e-06, "loss": 0.7707, "step": 2026 }, { "epoch": 0.8078110989339444, "grad_norm": 0.20314858168527, "learning_rate": 3.6052755949930028e-06, "loss": 0.8014, "step": 2027 }, { "epoch": 0.8082096243897579, "grad_norm": 0.20807347591232647, "learning_rate": 3.590795610920106e-06, "loss": 0.7783, "step": 2028 }, { "epoch": 0.8086081498455714, "grad_norm": 0.20632811448011976, "learning_rate": 3.5763418952563964e-06, "loss": 0.7887, "step": 2029 }, { "epoch": 0.8090066753013849, "grad_norm": 0.21490462809860467, "learning_rate": 3.561914471139887e-06, "loss": 0.7844, "step": 2030 }, { "epoch": 0.8094052007571984, "grad_norm": 0.20507534096776664, "learning_rate": 3.547513361666468e-06, "loss": 0.7904, "step": 2031 }, { "epoch": 0.8098037262130119, "grad_norm": 0.20644876557134534, "learning_rate": 3.5331385898899286e-06, "loss": 0.7691, "step": 2032 }, { "epoch": 0.8102022516688253, "grad_norm": 0.21240998726372254, "learning_rate": 3.5187901788219005e-06, "loss": 0.8199, "step": 2033 }, { "epoch": 0.8106007771246388, "grad_norm": 0.20137624296072554, "learning_rate": 3.5044681514317923e-06, "loss": 0.7814, "step": 2034 }, { "epoch": 0.8109993025804523, "grad_norm": 0.2073451450199298, "learning_rate": 3.4901725306467983e-06, "loss": 0.7769, "step": 2035 }, { "epoch": 0.8113978280362658, "grad_norm": 0.2134160597885788, "learning_rate": 3.4759033393518227e-06, "loss": 0.7811, "step": 2036 }, { "epoch": 0.8117963534920793, "grad_norm": 0.20469419291818344, "learning_rate": 3.461660600389476e-06, "loss": 0.7819, "step": 2037 }, { "epoch": 0.8121948789478928, "grad_norm": 0.20376860496093793, "learning_rate": 3.447444336560013e-06, "loss": 0.7816, "step": 2038 }, { "epoch": 0.8125934044037063, "grad_norm": 0.41207208863994677, "learning_rate": 3.4332545706213092e-06, "loss": 0.7927, "step": 2039 }, { "epoch": 0.8129919298595197, "grad_norm": 0.21507072465785926, "learning_rate": 3.4190913252888304e-06, "loss": 0.804, "step": 2040 }, { "epoch": 0.8133904553153333, "grad_norm": 0.20319740876888007, "learning_rate": 3.4049546232355677e-06, "loss": 0.7874, "step": 2041 }, { "epoch": 0.8137889807711468, "grad_norm": 0.20241224467511873, "learning_rate": 3.3908444870920377e-06, "loss": 0.7805, "step": 2042 }, { "epoch": 0.8141875062269602, "grad_norm": 0.21466864150429207, "learning_rate": 3.3767609394462177e-06, "loss": 0.78, "step": 2043 }, { "epoch": 0.8145860316827738, "grad_norm": 0.20218659511290218, "learning_rate": 3.3627040028435266e-06, "loss": 0.7801, "step": 2044 }, { "epoch": 0.8149845571385872, "grad_norm": 0.213036870154348, "learning_rate": 3.3486736997867973e-06, "loss": 0.7824, "step": 2045 }, { "epoch": 0.8153830825944007, "grad_norm": 0.19949805665039408, "learning_rate": 3.3346700527361976e-06, "loss": 0.7955, "step": 2046 }, { "epoch": 0.8157816080502142, "grad_norm": 0.20680232683225422, "learning_rate": 3.320693084109252e-06, "loss": 0.7897, "step": 2047 }, { "epoch": 0.8161801335060277, "grad_norm": 0.2000391282113421, "learning_rate": 3.3067428162807524e-06, "loss": 0.8005, "step": 2048 }, { "epoch": 0.8165786589618412, "grad_norm": 0.2156772773776592, "learning_rate": 3.2928192715827635e-06, "loss": 0.8053, "step": 2049 }, { "epoch": 0.8169771844176547, "grad_norm": 0.39867015204161727, "learning_rate": 3.2789224723045688e-06, "loss": 0.7969, "step": 2050 }, { "epoch": 0.8173757098734682, "grad_norm": 0.2046620024871545, "learning_rate": 3.265052440692633e-06, "loss": 0.7926, "step": 2051 }, { "epoch": 0.8177742353292816, "grad_norm": 0.2030808711787401, "learning_rate": 3.2512091989505755e-06, "loss": 0.7774, "step": 2052 }, { "epoch": 0.8181727607850952, "grad_norm": 0.20949507249814342, "learning_rate": 3.2373927692391183e-06, "loss": 0.793, "step": 2053 }, { "epoch": 0.8185712862409087, "grad_norm": 0.20772541980987708, "learning_rate": 3.2236031736760775e-06, "loss": 0.7726, "step": 2054 }, { "epoch": 0.8189698116967221, "grad_norm": 0.21408416045479248, "learning_rate": 3.209840434336291e-06, "loss": 0.7794, "step": 2055 }, { "epoch": 0.8193683371525357, "grad_norm": 0.22494235529547763, "learning_rate": 3.196104573251633e-06, "loss": 0.791, "step": 2056 }, { "epoch": 0.8197668626083491, "grad_norm": 0.20454170412693226, "learning_rate": 3.1823956124109245e-06, "loss": 0.7862, "step": 2057 }, { "epoch": 0.8201653880641626, "grad_norm": 0.20433874449012537, "learning_rate": 3.168713573759934e-06, "loss": 0.7666, "step": 2058 }, { "epoch": 0.820563913519976, "grad_norm": 0.20661160157593184, "learning_rate": 3.1550584792013384e-06, "loss": 0.7433, "step": 2059 }, { "epoch": 0.8209624389757896, "grad_norm": 0.20629809799285342, "learning_rate": 3.1414303505946674e-06, "loss": 0.7976, "step": 2060 }, { "epoch": 0.8213609644316031, "grad_norm": 0.2144450649554419, "learning_rate": 3.1278292097562902e-06, "loss": 0.8333, "step": 2061 }, { "epoch": 0.8217594898874165, "grad_norm": 0.20822166366362016, "learning_rate": 3.1142550784593784e-06, "loss": 0.8266, "step": 2062 }, { "epoch": 0.8221580153432301, "grad_norm": 0.24188329998112856, "learning_rate": 3.100707978433859e-06, "loss": 0.7876, "step": 2063 }, { "epoch": 0.8225565407990435, "grad_norm": 0.2048848180047204, "learning_rate": 3.087187931366382e-06, "loss": 0.7614, "step": 2064 }, { "epoch": 0.822955066254857, "grad_norm": 0.20470377463967024, "learning_rate": 3.0736949589003016e-06, "loss": 0.7781, "step": 2065 }, { "epoch": 0.8233535917106706, "grad_norm": 0.20987934787578208, "learning_rate": 3.0602290826356264e-06, "loss": 0.772, "step": 2066 }, { "epoch": 0.823752117166484, "grad_norm": 0.2113936816052613, "learning_rate": 3.046790324128972e-06, "loss": 0.7872, "step": 2067 }, { "epoch": 0.8241506426222975, "grad_norm": 0.19957043349861603, "learning_rate": 3.0333787048935794e-06, "loss": 0.7887, "step": 2068 }, { "epoch": 0.824549168078111, "grad_norm": 0.3857301817498995, "learning_rate": 3.019994246399205e-06, "loss": 0.7882, "step": 2069 }, { "epoch": 0.8249476935339245, "grad_norm": 0.20789973511441273, "learning_rate": 3.006636970072152e-06, "loss": 0.8076, "step": 2070 }, { "epoch": 0.8253462189897379, "grad_norm": 0.2058835362862163, "learning_rate": 2.993306897295194e-06, "loss": 0.7764, "step": 2071 }, { "epoch": 0.8257447444455515, "grad_norm": 0.20439869423777723, "learning_rate": 2.980004049407561e-06, "loss": 0.7764, "step": 2072 }, { "epoch": 0.826143269901365, "grad_norm": 0.19876479503616204, "learning_rate": 2.9667284477049075e-06, "loss": 0.7826, "step": 2073 }, { "epoch": 0.8265417953571784, "grad_norm": 0.1982699447253256, "learning_rate": 2.9534801134392644e-06, "loss": 0.7757, "step": 2074 }, { "epoch": 0.826940320812992, "grad_norm": 0.20536270507053644, "learning_rate": 2.9402590678190134e-06, "loss": 0.7943, "step": 2075 }, { "epoch": 0.8273388462688054, "grad_norm": 0.20479786214195925, "learning_rate": 2.927065332008847e-06, "loss": 0.796, "step": 2076 }, { "epoch": 0.8277373717246189, "grad_norm": 0.204692054035632, "learning_rate": 2.9138989271297525e-06, "loss": 0.7757, "step": 2077 }, { "epoch": 0.8281358971804323, "grad_norm": 0.2088750085892623, "learning_rate": 2.900759874258938e-06, "loss": 0.8125, "step": 2078 }, { "epoch": 0.8285344226362459, "grad_norm": 0.2044102963337698, "learning_rate": 2.887648194429862e-06, "loss": 0.7641, "step": 2079 }, { "epoch": 0.8289329480920594, "grad_norm": 0.21327563387382853, "learning_rate": 2.874563908632142e-06, "loss": 0.7994, "step": 2080 }, { "epoch": 0.8293314735478728, "grad_norm": 0.2046570896223022, "learning_rate": 2.8615070378115372e-06, "loss": 0.8017, "step": 2081 }, { "epoch": 0.8297299990036864, "grad_norm": 0.19812578410366266, "learning_rate": 2.848477602869937e-06, "loss": 0.784, "step": 2082 }, { "epoch": 0.8301285244594998, "grad_norm": 0.20601688938227922, "learning_rate": 2.8354756246652913e-06, "loss": 0.769, "step": 2083 }, { "epoch": 0.8305270499153133, "grad_norm": 0.2057354048825274, "learning_rate": 2.822501124011612e-06, "loss": 0.7847, "step": 2084 }, { "epoch": 0.8309255753711269, "grad_norm": 0.21168604129063812, "learning_rate": 2.809554121678917e-06, "loss": 0.8032, "step": 2085 }, { "epoch": 0.8313241008269403, "grad_norm": 0.2100939254517527, "learning_rate": 2.7966346383932076e-06, "loss": 0.7874, "step": 2086 }, { "epoch": 0.8317226262827538, "grad_norm": 0.21934203978806813, "learning_rate": 2.7837426948364334e-06, "loss": 0.79, "step": 2087 }, { "epoch": 0.8321211517385673, "grad_norm": 0.19759229839235726, "learning_rate": 2.7708783116464435e-06, "loss": 0.7655, "step": 2088 }, { "epoch": 0.8325196771943808, "grad_norm": 0.2086778699301496, "learning_rate": 2.7580415094169865e-06, "loss": 0.7839, "step": 2089 }, { "epoch": 0.8329182026501942, "grad_norm": 0.21338341723931933, "learning_rate": 2.745232308697636e-06, "loss": 0.829, "step": 2090 }, { "epoch": 0.8333167281060078, "grad_norm": 0.21045174950788936, "learning_rate": 2.732450729993814e-06, "loss": 0.8096, "step": 2091 }, { "epoch": 0.8337152535618213, "grad_norm": 0.2051766400490156, "learning_rate": 2.7196967937666865e-06, "loss": 0.8039, "step": 2092 }, { "epoch": 0.8341137790176347, "grad_norm": 0.19510414251619265, "learning_rate": 2.706970520433192e-06, "loss": 0.7793, "step": 2093 }, { "epoch": 0.8345123044734483, "grad_norm": 0.2023242681129976, "learning_rate": 2.6942719303659837e-06, "loss": 0.781, "step": 2094 }, { "epoch": 0.8349108299292617, "grad_norm": 0.2030427501132859, "learning_rate": 2.681601043893387e-06, "loss": 0.781, "step": 2095 }, { "epoch": 0.8353093553850752, "grad_norm": 0.20888874667008847, "learning_rate": 2.6689578812993857e-06, "loss": 0.7694, "step": 2096 }, { "epoch": 0.8357078808408888, "grad_norm": 0.20077367736979854, "learning_rate": 2.6563424628235845e-06, "loss": 0.7848, "step": 2097 }, { "epoch": 0.8361064062967022, "grad_norm": 0.21005110509053168, "learning_rate": 2.6437548086611765e-06, "loss": 0.7988, "step": 2098 }, { "epoch": 0.8365049317525157, "grad_norm": 0.19800915015594286, "learning_rate": 2.6311949389628956e-06, "loss": 0.8021, "step": 2099 }, { "epoch": 0.8369034572083291, "grad_norm": 0.20692630086537173, "learning_rate": 2.618662873835007e-06, "loss": 0.796, "step": 2100 }, { "epoch": 0.8373019826641427, "grad_norm": 0.20999876285414867, "learning_rate": 2.6061586333392684e-06, "loss": 0.8025, "step": 2101 }, { "epoch": 0.8377005081199561, "grad_norm": 0.20623308075487845, "learning_rate": 2.5936822374928894e-06, "loss": 0.7815, "step": 2102 }, { "epoch": 0.8380990335757696, "grad_norm": 0.205638179543828, "learning_rate": 2.581233706268509e-06, "loss": 0.802, "step": 2103 }, { "epoch": 0.8384975590315832, "grad_norm": 0.19752040584951092, "learning_rate": 2.5688130595941486e-06, "loss": 0.7556, "step": 2104 }, { "epoch": 0.8388960844873966, "grad_norm": 0.20069625765475899, "learning_rate": 2.55642031735321e-06, "loss": 0.7889, "step": 2105 }, { "epoch": 0.8392946099432101, "grad_norm": 0.2018781461121737, "learning_rate": 2.544055499384406e-06, "loss": 0.8142, "step": 2106 }, { "epoch": 0.8396931353990236, "grad_norm": 0.19475379047238844, "learning_rate": 2.5317186254817538e-06, "loss": 0.7663, "step": 2107 }, { "epoch": 0.8400916608548371, "grad_norm": 0.1969342228912807, "learning_rate": 2.519409715394545e-06, "loss": 0.7938, "step": 2108 }, { "epoch": 0.8404901863106506, "grad_norm": 0.19895944903191795, "learning_rate": 2.5071287888272953e-06, "loss": 0.8051, "step": 2109 }, { "epoch": 0.8408887117664641, "grad_norm": 0.20042877149823382, "learning_rate": 2.4948758654397342e-06, "loss": 0.7833, "step": 2110 }, { "epoch": 0.8412872372222776, "grad_norm": 0.19887545472768395, "learning_rate": 2.4826509648467424e-06, "loss": 0.7742, "step": 2111 }, { "epoch": 0.841685762678091, "grad_norm": 0.2011722070087204, "learning_rate": 2.470454106618363e-06, "loss": 0.7857, "step": 2112 }, { "epoch": 0.8420842881339046, "grad_norm": 0.20180297794597085, "learning_rate": 2.458285310279738e-06, "loss": 0.7997, "step": 2113 }, { "epoch": 0.842482813589718, "grad_norm": 0.20055121230743078, "learning_rate": 2.4461445953110862e-06, "loss": 0.8014, "step": 2114 }, { "epoch": 0.8428813390455315, "grad_norm": 0.19868315248272878, "learning_rate": 2.43403198114768e-06, "loss": 0.774, "step": 2115 }, { "epoch": 0.8432798645013451, "grad_norm": 0.19770045553158802, "learning_rate": 2.4219474871797942e-06, "loss": 0.7856, "step": 2116 }, { "epoch": 0.8436783899571585, "grad_norm": 0.20259006469350982, "learning_rate": 2.409891132752702e-06, "loss": 0.8102, "step": 2117 }, { "epoch": 0.844076915412972, "grad_norm": 0.2013541403832189, "learning_rate": 2.3978629371666174e-06, "loss": 0.7853, "step": 2118 }, { "epoch": 0.8444754408687855, "grad_norm": 0.20033442757315134, "learning_rate": 2.3858629196766846e-06, "loss": 0.7877, "step": 2119 }, { "epoch": 0.844873966324599, "grad_norm": 0.21068432536317944, "learning_rate": 2.3738910994929353e-06, "loss": 0.766, "step": 2120 }, { "epoch": 0.8452724917804125, "grad_norm": 0.1980119004076494, "learning_rate": 2.36194749578027e-06, "loss": 0.7731, "step": 2121 }, { "epoch": 0.845671017236226, "grad_norm": 0.19889954520717595, "learning_rate": 2.3500321276584103e-06, "loss": 0.796, "step": 2122 }, { "epoch": 0.8460695426920395, "grad_norm": 0.29416894294679846, "learning_rate": 2.338145014201878e-06, "loss": 0.8096, "step": 2123 }, { "epoch": 0.8464680681478529, "grad_norm": 0.19806318324832906, "learning_rate": 2.326286174439969e-06, "loss": 0.7997, "step": 2124 }, { "epoch": 0.8468665936036665, "grad_norm": 0.19823684897235574, "learning_rate": 2.3144556273567132e-06, "loss": 0.7607, "step": 2125 }, { "epoch": 0.8472651190594799, "grad_norm": 0.18966161568344858, "learning_rate": 2.30265339189085e-06, "loss": 0.7804, "step": 2126 }, { "epoch": 0.8476636445152934, "grad_norm": 0.19521990516259677, "learning_rate": 2.2908794869358044e-06, "loss": 0.7648, "step": 2127 }, { "epoch": 0.848062169971107, "grad_norm": 0.21019481820981523, "learning_rate": 2.27913393133963e-06, "loss": 0.801, "step": 2128 }, { "epoch": 0.8484606954269204, "grad_norm": 0.2044393443918899, "learning_rate": 2.267416743905018e-06, "loss": 0.7998, "step": 2129 }, { "epoch": 0.8488592208827339, "grad_norm": 0.1983161340871745, "learning_rate": 2.255727943389232e-06, "loss": 0.7829, "step": 2130 }, { "epoch": 0.8492577463385473, "grad_norm": 0.3883686062566025, "learning_rate": 2.244067548504101e-06, "loss": 0.7689, "step": 2131 }, { "epoch": 0.8496562717943609, "grad_norm": 0.19823170694060893, "learning_rate": 2.232435577915981e-06, "loss": 0.7841, "step": 2132 }, { "epoch": 0.8500547972501744, "grad_norm": 0.2011348839077823, "learning_rate": 2.2208320502457247e-06, "loss": 0.7743, "step": 2133 }, { "epoch": 0.8504533227059878, "grad_norm": 0.2678986826453042, "learning_rate": 2.209256984068653e-06, "loss": 0.8186, "step": 2134 }, { "epoch": 0.8508518481618014, "grad_norm": 0.38901312200457155, "learning_rate": 2.1977103979145144e-06, "loss": 0.7873, "step": 2135 }, { "epoch": 0.8512503736176148, "grad_norm": 0.19801665808383853, "learning_rate": 2.186192310267481e-06, "loss": 0.7962, "step": 2136 }, { "epoch": 0.8516488990734283, "grad_norm": 0.19959353534388102, "learning_rate": 2.174702739566097e-06, "loss": 0.7875, "step": 2137 }, { "epoch": 0.8520474245292418, "grad_norm": 0.19906997852364527, "learning_rate": 2.1632417042032582e-06, "loss": 0.799, "step": 2138 }, { "epoch": 0.8524459499850553, "grad_norm": 0.19383785374266083, "learning_rate": 2.151809222526171e-06, "loss": 0.8012, "step": 2139 }, { "epoch": 0.8528444754408688, "grad_norm": 0.20008791840830747, "learning_rate": 2.140405312836342e-06, "loss": 0.8034, "step": 2140 }, { "epoch": 0.8532430008966823, "grad_norm": 0.5550294238933178, "learning_rate": 2.1290299933895375e-06, "loss": 0.8056, "step": 2141 }, { "epoch": 0.8536415263524958, "grad_norm": 0.19867486415459287, "learning_rate": 2.1176832823957437e-06, "loss": 0.7777, "step": 2142 }, { "epoch": 0.8540400518083092, "grad_norm": 0.19676333190679646, "learning_rate": 2.1063651980191735e-06, "loss": 0.7915, "step": 2143 }, { "epoch": 0.8544385772641228, "grad_norm": 0.1989409125958559, "learning_rate": 2.095075758378191e-06, "loss": 0.8095, "step": 2144 }, { "epoch": 0.8548371027199362, "grad_norm": 0.21328576722717954, "learning_rate": 2.083814981545316e-06, "loss": 0.8003, "step": 2145 }, { "epoch": 0.8552356281757497, "grad_norm": 0.20295493914625967, "learning_rate": 2.0725828855471743e-06, "loss": 0.8048, "step": 2146 }, { "epoch": 0.8556341536315633, "grad_norm": 0.2074806852443234, "learning_rate": 2.06137948836449e-06, "loss": 0.8056, "step": 2147 }, { "epoch": 0.8560326790873767, "grad_norm": 0.1970460127714032, "learning_rate": 2.0502048079320412e-06, "loss": 0.7719, "step": 2148 }, { "epoch": 0.8564312045431902, "grad_norm": 0.20135572980918695, "learning_rate": 2.03905886213863e-06, "loss": 0.8124, "step": 2149 }, { "epoch": 0.8568297299990036, "grad_norm": 0.19706602719348762, "learning_rate": 2.0279416688270714e-06, "loss": 0.8042, "step": 2150 }, { "epoch": 0.8572282554548172, "grad_norm": 0.19351017765851636, "learning_rate": 2.0168532457941347e-06, "loss": 0.7817, "step": 2151 }, { "epoch": 0.8576267809106307, "grad_norm": 0.19662641436265876, "learning_rate": 2.0057936107905496e-06, "loss": 0.7872, "step": 2152 }, { "epoch": 0.8580253063664441, "grad_norm": 0.19472713717233617, "learning_rate": 1.994762781520947e-06, "loss": 0.7959, "step": 2153 }, { "epoch": 0.8584238318222577, "grad_norm": 0.4466872234199686, "learning_rate": 1.9837607756438506e-06, "loss": 0.7957, "step": 2154 }, { "epoch": 0.8588223572780711, "grad_norm": 0.19598069824689382, "learning_rate": 1.972787610771656e-06, "loss": 0.7728, "step": 2155 }, { "epoch": 0.8592208827338846, "grad_norm": 0.20101685010301282, "learning_rate": 1.9618433044705653e-06, "loss": 0.7943, "step": 2156 }, { "epoch": 0.8596194081896981, "grad_norm": 0.298341423595395, "learning_rate": 1.9509278742605998e-06, "loss": 0.8152, "step": 2157 }, { "epoch": 0.8600179336455116, "grad_norm": 0.19641318468760852, "learning_rate": 1.9400413376155414e-06, "loss": 0.7718, "step": 2158 }, { "epoch": 0.8604164591013251, "grad_norm": 0.20359959382775875, "learning_rate": 1.929183711962932e-06, "loss": 0.8166, "step": 2159 }, { "epoch": 0.8608149845571386, "grad_norm": 0.29285934932172486, "learning_rate": 1.918355014684026e-06, "loss": 0.8116, "step": 2160 }, { "epoch": 0.8612135100129521, "grad_norm": 0.20081004118069398, "learning_rate": 1.9075552631137673e-06, "loss": 0.828, "step": 2161 }, { "epoch": 0.8616120354687655, "grad_norm": 0.19491684359283115, "learning_rate": 1.8967844745407649e-06, "loss": 0.8162, "step": 2162 }, { "epoch": 0.8620105609245791, "grad_norm": 0.19931801177242742, "learning_rate": 1.8860426662072573e-06, "loss": 0.7646, "step": 2163 }, { "epoch": 0.8624090863803926, "grad_norm": 0.19469429796070387, "learning_rate": 1.8753298553091004e-06, "loss": 0.7662, "step": 2164 }, { "epoch": 0.862807611836206, "grad_norm": 0.19523553415875863, "learning_rate": 1.8646460589957138e-06, "loss": 0.7675, "step": 2165 }, { "epoch": 0.8632061372920196, "grad_norm": 0.19836255092500826, "learning_rate": 1.8539912943700921e-06, "loss": 0.8162, "step": 2166 }, { "epoch": 0.863604662747833, "grad_norm": 0.33046612241829804, "learning_rate": 1.8433655784887338e-06, "loss": 0.786, "step": 2167 }, { "epoch": 0.8640031882036465, "grad_norm": 0.20287140254104755, "learning_rate": 1.832768928361648e-06, "loss": 0.8033, "step": 2168 }, { "epoch": 0.86440171365946, "grad_norm": 0.19837142562234192, "learning_rate": 1.8222013609523138e-06, "loss": 0.7856, "step": 2169 }, { "epoch": 0.8648002391152735, "grad_norm": 0.21103666545418504, "learning_rate": 1.8116628931776437e-06, "loss": 0.8434, "step": 2170 }, { "epoch": 0.865198764571087, "grad_norm": 0.19867703712237042, "learning_rate": 1.801153541907974e-06, "loss": 0.7698, "step": 2171 }, { "epoch": 0.8655972900269004, "grad_norm": 0.19825876352724692, "learning_rate": 1.7906733239670338e-06, "loss": 0.772, "step": 2172 }, { "epoch": 0.865995815482714, "grad_norm": 0.20878459364682986, "learning_rate": 1.7802222561319116e-06, "loss": 0.7581, "step": 2173 }, { "epoch": 0.8663943409385274, "grad_norm": 0.2958038314902087, "learning_rate": 1.7698003551330222e-06, "loss": 0.7944, "step": 2174 }, { "epoch": 0.8667928663943409, "grad_norm": 0.20169391290837302, "learning_rate": 1.7594076376541025e-06, "loss": 0.8066, "step": 2175 }, { "epoch": 0.8671913918501545, "grad_norm": 0.234034044100227, "learning_rate": 1.749044120332164e-06, "loss": 0.7721, "step": 2176 }, { "epoch": 0.8675899173059679, "grad_norm": 0.2034910419905341, "learning_rate": 1.7387098197574782e-06, "loss": 0.8084, "step": 2177 }, { "epoch": 0.8679884427617814, "grad_norm": 0.2073685879363281, "learning_rate": 1.7284047524735426e-06, "loss": 0.7925, "step": 2178 }, { "epoch": 0.8683869682175949, "grad_norm": 0.20037230019907548, "learning_rate": 1.7181289349770547e-06, "loss": 0.7811, "step": 2179 }, { "epoch": 0.8687854936734084, "grad_norm": 0.21712284699454534, "learning_rate": 1.707882383717896e-06, "loss": 0.7678, "step": 2180 }, { "epoch": 0.8691840191292218, "grad_norm": 0.20117180870370702, "learning_rate": 1.697665115099083e-06, "loss": 0.7942, "step": 2181 }, { "epoch": 0.8695825445850354, "grad_norm": 0.194101573652863, "learning_rate": 1.6874771454767723e-06, "loss": 0.7824, "step": 2182 }, { "epoch": 0.8699810700408489, "grad_norm": 0.19921324707773355, "learning_rate": 1.677318491160207e-06, "loss": 0.7928, "step": 2183 }, { "epoch": 0.8703795954966623, "grad_norm": 0.3229505296718228, "learning_rate": 1.6671891684117048e-06, "loss": 0.827, "step": 2184 }, { "epoch": 0.8707781209524759, "grad_norm": 0.19497337244902666, "learning_rate": 1.6570891934466304e-06, "loss": 0.8059, "step": 2185 }, { "epoch": 0.8711766464082893, "grad_norm": 0.19561470121792823, "learning_rate": 1.6470185824333617e-06, "loss": 0.7976, "step": 2186 }, { "epoch": 0.8715751718641028, "grad_norm": 0.1969078670974646, "learning_rate": 1.6369773514932786e-06, "loss": 0.7653, "step": 2187 }, { "epoch": 0.8719736973199164, "grad_norm": 0.19792267780479758, "learning_rate": 1.6269655167007136e-06, "loss": 0.7824, "step": 2188 }, { "epoch": 0.8723722227757298, "grad_norm": 0.19510256307880908, "learning_rate": 1.6169830940829578e-06, "loss": 0.8068, "step": 2189 }, { "epoch": 0.8727707482315433, "grad_norm": 0.1960870054521117, "learning_rate": 1.6070300996202126e-06, "loss": 0.7989, "step": 2190 }, { "epoch": 0.8731692736873568, "grad_norm": 0.19085815051372912, "learning_rate": 1.5971065492455617e-06, "loss": 0.7636, "step": 2191 }, { "epoch": 0.8735677991431703, "grad_norm": 0.19882296691960544, "learning_rate": 1.5872124588449667e-06, "loss": 0.7659, "step": 2192 }, { "epoch": 0.8739663245989837, "grad_norm": 0.20028837148412157, "learning_rate": 1.5773478442572154e-06, "loss": 0.7934, "step": 2193 }, { "epoch": 0.8743648500547972, "grad_norm": 0.19461902920242444, "learning_rate": 1.5675127212739183e-06, "loss": 0.7905, "step": 2194 }, { "epoch": 0.8747633755106108, "grad_norm": 0.2016751952111212, "learning_rate": 1.5577071056394743e-06, "loss": 0.7862, "step": 2195 }, { "epoch": 0.8751619009664242, "grad_norm": 0.19602147097639658, "learning_rate": 1.5479310130510428e-06, "loss": 0.7845, "step": 2196 }, { "epoch": 0.8755604264222377, "grad_norm": 0.19583338180249446, "learning_rate": 1.5381844591585294e-06, "loss": 0.7957, "step": 2197 }, { "epoch": 0.8759589518780512, "grad_norm": 0.19403020064241092, "learning_rate": 1.5284674595645376e-06, "loss": 0.7963, "step": 2198 }, { "epoch": 0.8763574773338647, "grad_norm": 0.19782089212017984, "learning_rate": 1.518780029824376e-06, "loss": 0.7782, "step": 2199 }, { "epoch": 0.8767560027896782, "grad_norm": 0.19942984981212644, "learning_rate": 1.5091221854460037e-06, "loss": 0.7975, "step": 2200 }, { "epoch": 0.8771545282454917, "grad_norm": 0.19196702353727593, "learning_rate": 1.4994939418900334e-06, "loss": 0.7829, "step": 2201 }, { "epoch": 0.8775530537013052, "grad_norm": 0.19379377172825363, "learning_rate": 1.4898953145696738e-06, "loss": 0.7982, "step": 2202 }, { "epoch": 0.8779515791571186, "grad_norm": 0.19506234613903994, "learning_rate": 1.4803263188507377e-06, "loss": 0.7954, "step": 2203 }, { "epoch": 0.8783501046129322, "grad_norm": 0.1978506554262955, "learning_rate": 1.4707869700515965e-06, "loss": 0.784, "step": 2204 }, { "epoch": 0.8787486300687456, "grad_norm": 0.1980098585833247, "learning_rate": 1.4612772834431566e-06, "loss": 0.7569, "step": 2205 }, { "epoch": 0.8791471555245591, "grad_norm": 0.19286242098132406, "learning_rate": 1.4517972742488518e-06, "loss": 0.7872, "step": 2206 }, { "epoch": 0.8795456809803727, "grad_norm": 0.19098749250411995, "learning_rate": 1.4423469576446002e-06, "loss": 0.7815, "step": 2207 }, { "epoch": 0.8799442064361861, "grad_norm": 0.20211925019195784, "learning_rate": 1.4329263487587896e-06, "loss": 0.8205, "step": 2208 }, { "epoch": 0.8803427318919996, "grad_norm": 0.19532927186278154, "learning_rate": 1.4235354626722431e-06, "loss": 0.8121, "step": 2209 }, { "epoch": 0.8807412573478131, "grad_norm": 0.1977750810931428, "learning_rate": 1.4141743144182153e-06, "loss": 0.7813, "step": 2210 }, { "epoch": 0.8811397828036266, "grad_norm": 0.19358648033690376, "learning_rate": 1.4048429189823432e-06, "loss": 0.7455, "step": 2211 }, { "epoch": 0.88153830825944, "grad_norm": 0.19846194328922676, "learning_rate": 1.3955412913026468e-06, "loss": 0.7662, "step": 2212 }, { "epoch": 0.8819368337152536, "grad_norm": 0.19353205579063595, "learning_rate": 1.3862694462694836e-06, "loss": 0.7835, "step": 2213 }, { "epoch": 0.8823353591710671, "grad_norm": 0.1961760649090444, "learning_rate": 1.3770273987255322e-06, "loss": 0.7869, "step": 2214 }, { "epoch": 0.8827338846268805, "grad_norm": 0.198917812531222, "learning_rate": 1.36781516346578e-06, "loss": 0.7903, "step": 2215 }, { "epoch": 0.883132410082694, "grad_norm": 0.31377432050732995, "learning_rate": 1.3586327552374834e-06, "loss": 0.7966, "step": 2216 }, { "epoch": 0.8835309355385075, "grad_norm": 0.198947487765649, "learning_rate": 1.349480188740151e-06, "loss": 0.7845, "step": 2217 }, { "epoch": 0.883929460994321, "grad_norm": 0.19609086834502595, "learning_rate": 1.3403574786255203e-06, "loss": 0.8267, "step": 2218 }, { "epoch": 0.8843279864501346, "grad_norm": 0.19456541239424982, "learning_rate": 1.3312646394975336e-06, "loss": 0.7844, "step": 2219 }, { "epoch": 0.884726511905948, "grad_norm": 0.18969287146965966, "learning_rate": 1.322201685912321e-06, "loss": 0.7561, "step": 2220 }, { "epoch": 0.8851250373617615, "grad_norm": 0.19860314043543428, "learning_rate": 1.3131686323781567e-06, "loss": 0.7827, "step": 2221 }, { "epoch": 0.8855235628175749, "grad_norm": 0.19669097960151344, "learning_rate": 1.3041654933554627e-06, "loss": 0.8035, "step": 2222 }, { "epoch": 0.8859220882733885, "grad_norm": 0.20094073004540627, "learning_rate": 1.2951922832567676e-06, "loss": 0.7944, "step": 2223 }, { "epoch": 0.8863206137292019, "grad_norm": 0.20272095445679028, "learning_rate": 1.28624901644669e-06, "loss": 0.8167, "step": 2224 }, { "epoch": 0.8867191391850154, "grad_norm": 0.1953963230612544, "learning_rate": 1.2773357072419156e-06, "loss": 0.7721, "step": 2225 }, { "epoch": 0.887117664640829, "grad_norm": 0.19553209878909217, "learning_rate": 1.2684523699111683e-06, "loss": 0.7898, "step": 2226 }, { "epoch": 0.8875161900966424, "grad_norm": 0.19739783007158812, "learning_rate": 1.259599018675197e-06, "loss": 0.7751, "step": 2227 }, { "epoch": 0.8879147155524559, "grad_norm": 0.2004207680549029, "learning_rate": 1.2507756677067407e-06, "loss": 0.7937, "step": 2228 }, { "epoch": 0.8883132410082694, "grad_norm": 0.20274609576106925, "learning_rate": 1.241982331130518e-06, "loss": 0.7834, "step": 2229 }, { "epoch": 0.8887117664640829, "grad_norm": 0.191410521331001, "learning_rate": 1.233219023023211e-06, "loss": 0.7964, "step": 2230 }, { "epoch": 0.8891102919198964, "grad_norm": 0.19149409444639265, "learning_rate": 1.2244857574134073e-06, "loss": 0.8145, "step": 2231 }, { "epoch": 0.8895088173757099, "grad_norm": 0.18942106902230516, "learning_rate": 1.215782548281621e-06, "loss": 0.7978, "step": 2232 }, { "epoch": 0.8899073428315234, "grad_norm": 0.1936628231256215, "learning_rate": 1.2071094095602388e-06, "loss": 0.7688, "step": 2233 }, { "epoch": 0.8903058682873368, "grad_norm": 0.19500787909123946, "learning_rate": 1.198466355133514e-06, "loss": 0.7985, "step": 2234 }, { "epoch": 0.8907043937431504, "grad_norm": 0.19336207767259123, "learning_rate": 1.1898533988375438e-06, "loss": 0.7776, "step": 2235 }, { "epoch": 0.8911029191989638, "grad_norm": 0.19306715682597733, "learning_rate": 1.1812705544602387e-06, "loss": 0.7781, "step": 2236 }, { "epoch": 0.8915014446547773, "grad_norm": 0.1941966927070721, "learning_rate": 1.1727178357413082e-06, "loss": 0.7966, "step": 2237 }, { "epoch": 0.8918999701105909, "grad_norm": 0.19470860439438434, "learning_rate": 1.1641952563722292e-06, "loss": 0.7875, "step": 2238 }, { "epoch": 0.8922984955664043, "grad_norm": 0.19439172234266613, "learning_rate": 1.155702829996239e-06, "loss": 0.7949, "step": 2239 }, { "epoch": 0.8926970210222178, "grad_norm": 0.19183529464923693, "learning_rate": 1.1472405702082966e-06, "loss": 0.8169, "step": 2240 }, { "epoch": 0.8930955464780312, "grad_norm": 0.187219800602026, "learning_rate": 1.1388084905550767e-06, "loss": 0.7913, "step": 2241 }, { "epoch": 0.8934940719338448, "grad_norm": 0.19725379366605064, "learning_rate": 1.1304066045349371e-06, "loss": 0.7759, "step": 2242 }, { "epoch": 0.8938925973896583, "grad_norm": 0.1992594593840214, "learning_rate": 1.1220349255978991e-06, "loss": 0.8375, "step": 2243 }, { "epoch": 0.8942911228454717, "grad_norm": 0.192342482643918, "learning_rate": 1.1136934671456356e-06, "loss": 0.7732, "step": 2244 }, { "epoch": 0.8946896483012853, "grad_norm": 0.19446382141607246, "learning_rate": 1.1053822425314253e-06, "loss": 0.7787, "step": 2245 }, { "epoch": 0.8950881737570987, "grad_norm": 0.21096981672144022, "learning_rate": 1.0971012650601653e-06, "loss": 0.7856, "step": 2246 }, { "epoch": 0.8954866992129122, "grad_norm": 0.19037453116007338, "learning_rate": 1.0888505479883226e-06, "loss": 0.8141, "step": 2247 }, { "epoch": 0.8958852246687257, "grad_norm": 0.1936745233440335, "learning_rate": 1.0806301045239253e-06, "loss": 0.776, "step": 2248 }, { "epoch": 0.8962837501245392, "grad_norm": 0.19445932650167486, "learning_rate": 1.0724399478265312e-06, "loss": 0.7968, "step": 2249 }, { "epoch": 0.8966822755803527, "grad_norm": 0.1942260109414643, "learning_rate": 1.064280091007226e-06, "loss": 0.7982, "step": 2250 }, { "epoch": 0.8970808010361662, "grad_norm": 0.19599907378500261, "learning_rate": 1.056150547128585e-06, "loss": 0.7812, "step": 2251 }, { "epoch": 0.8974793264919797, "grad_norm": 0.18888785669949568, "learning_rate": 1.048051329204649e-06, "loss": 0.7749, "step": 2252 }, { "epoch": 0.8978778519477931, "grad_norm": 0.19413389947923068, "learning_rate": 1.0399824502009292e-06, "loss": 0.817, "step": 2253 }, { "epoch": 0.8982763774036067, "grad_norm": 0.19041901167362632, "learning_rate": 1.0319439230343552e-06, "loss": 0.7829, "step": 2254 }, { "epoch": 0.8986749028594202, "grad_norm": 0.190265615798965, "learning_rate": 1.023935760573278e-06, "loss": 0.7854, "step": 2255 }, { "epoch": 0.8990734283152336, "grad_norm": 0.1917924700076846, "learning_rate": 1.0159579756374272e-06, "loss": 0.8021, "step": 2256 }, { "epoch": 0.8994719537710472, "grad_norm": 0.19462841904809697, "learning_rate": 1.0080105809979134e-06, "loss": 0.7983, "step": 2257 }, { "epoch": 0.8998704792268606, "grad_norm": 0.19572994397974086, "learning_rate": 1.0000935893771957e-06, "loss": 0.7807, "step": 2258 }, { "epoch": 0.9002690046826741, "grad_norm": 0.19368930137185603, "learning_rate": 9.922070134490625e-07, "loss": 0.8069, "step": 2259 }, { "epoch": 0.9006675301384875, "grad_norm": 0.18858216628151148, "learning_rate": 9.843508658386147e-07, "loss": 0.778, "step": 2260 }, { "epoch": 0.9010660555943011, "grad_norm": 0.1902121814138829, "learning_rate": 9.765251591222302e-07, "loss": 0.7545, "step": 2261 }, { "epoch": 0.9014645810501146, "grad_norm": 0.19207716877501332, "learning_rate": 9.687299058275723e-07, "loss": 0.8013, "step": 2262 }, { "epoch": 0.901863106505928, "grad_norm": 0.19334913879349405, "learning_rate": 9.609651184335389e-07, "loss": 0.7946, "step": 2263 }, { "epoch": 0.9022616319617416, "grad_norm": 0.19358676874591074, "learning_rate": 9.532308093702691e-07, "loss": 0.7772, "step": 2264 }, { "epoch": 0.902660157417555, "grad_norm": 0.19148428383209684, "learning_rate": 9.455269910191101e-07, "loss": 0.7696, "step": 2265 }, { "epoch": 0.9030586828733685, "grad_norm": 0.19540552907978265, "learning_rate": 9.378536757125878e-07, "loss": 0.8139, "step": 2266 }, { "epoch": 0.903457208329182, "grad_norm": 0.19107073335621758, "learning_rate": 9.302108757344119e-07, "loss": 0.7858, "step": 2267 }, { "epoch": 0.9038557337849955, "grad_norm": 0.1918963920445226, "learning_rate": 9.225986033194268e-07, "loss": 0.7788, "step": 2268 }, { "epoch": 0.904254259240809, "grad_norm": 0.19310448866238283, "learning_rate": 9.150168706536178e-07, "loss": 0.7866, "step": 2269 }, { "epoch": 0.9046527846966225, "grad_norm": 0.19687413534571704, "learning_rate": 9.07465689874083e-07, "loss": 0.7893, "step": 2270 }, { "epoch": 0.905051310152436, "grad_norm": 0.1914042946404483, "learning_rate": 8.99945073069004e-07, "loss": 0.7748, "step": 2271 }, { "epoch": 0.9054498356082494, "grad_norm": 0.2024204777517844, "learning_rate": 8.924550322776415e-07, "loss": 0.8568, "step": 2272 }, { "epoch": 0.905848361064063, "grad_norm": 0.19403659491993944, "learning_rate": 8.849955794903042e-07, "loss": 0.8056, "step": 2273 }, { "epoch": 0.9062468865198765, "grad_norm": 0.19411389381810215, "learning_rate": 8.775667266483378e-07, "loss": 0.7911, "step": 2274 }, { "epoch": 0.9066454119756899, "grad_norm": 0.1924715710067694, "learning_rate": 8.70168485644094e-07, "loss": 0.7965, "step": 2275 }, { "epoch": 0.9070439374315035, "grad_norm": 0.20038568330574344, "learning_rate": 8.628008683209388e-07, "loss": 0.7843, "step": 2276 }, { "epoch": 0.9074424628873169, "grad_norm": 0.20132103527197703, "learning_rate": 8.554638864731957e-07, "loss": 0.7999, "step": 2277 }, { "epoch": 0.9078409883431304, "grad_norm": 0.19240880279129838, "learning_rate": 8.481575518461538e-07, "loss": 0.7665, "step": 2278 }, { "epoch": 0.9082395137989439, "grad_norm": 0.19434784566980481, "learning_rate": 8.408818761360437e-07, "loss": 0.8056, "step": 2279 }, { "epoch": 0.9086380392547574, "grad_norm": 0.1978390018533812, "learning_rate": 8.336368709900089e-07, "loss": 0.8144, "step": 2280 }, { "epoch": 0.9090365647105709, "grad_norm": 0.19566833800627478, "learning_rate": 8.264225480061028e-07, "loss": 0.7771, "step": 2281 }, { "epoch": 0.9094350901663844, "grad_norm": 0.1975826569993677, "learning_rate": 8.192389187332539e-07, "loss": 0.7938, "step": 2282 }, { "epoch": 0.9098336156221979, "grad_norm": 0.19045983399236568, "learning_rate": 8.120859946712634e-07, "loss": 0.7845, "step": 2283 }, { "epoch": 0.9102321410780113, "grad_norm": 0.19130128975193195, "learning_rate": 8.049637872707672e-07, "loss": 0.7958, "step": 2284 }, { "epoch": 0.9106306665338249, "grad_norm": 0.19085596321752288, "learning_rate": 7.978723079332406e-07, "loss": 0.7612, "step": 2285 }, { "epoch": 0.9110291919896384, "grad_norm": 0.33424704454608156, "learning_rate": 7.908115680109629e-07, "loss": 0.7853, "step": 2286 }, { "epoch": 0.9114277174454518, "grad_norm": 0.1954738492496232, "learning_rate": 7.837815788070035e-07, "loss": 0.8041, "step": 2287 }, { "epoch": 0.9118262429012653, "grad_norm": 0.19475975240294963, "learning_rate": 7.767823515752116e-07, "loss": 0.7872, "step": 2288 }, { "epoch": 0.9122247683570788, "grad_norm": 0.18960360869197374, "learning_rate": 7.698138975201819e-07, "loss": 0.8041, "step": 2289 }, { "epoch": 0.9126232938128923, "grad_norm": 0.19589521054226136, "learning_rate": 7.628762277972534e-07, "loss": 0.7982, "step": 2290 }, { "epoch": 0.9130218192687057, "grad_norm": 0.1917631141189516, "learning_rate": 7.559693535124802e-07, "loss": 0.7938, "step": 2291 }, { "epoch": 0.9134203447245193, "grad_norm": 0.19253740493505767, "learning_rate": 7.490932857226219e-07, "loss": 0.7959, "step": 2292 }, { "epoch": 0.9138188701803328, "grad_norm": 0.19582622703851235, "learning_rate": 7.422480354351202e-07, "loss": 0.834, "step": 2293 }, { "epoch": 0.9142173956361462, "grad_norm": 0.18995947588533355, "learning_rate": 7.354336136080809e-07, "loss": 0.7762, "step": 2294 }, { "epoch": 0.9146159210919598, "grad_norm": 0.18806413991915635, "learning_rate": 7.286500311502686e-07, "loss": 0.797, "step": 2295 }, { "epoch": 0.9150144465477732, "grad_norm": 0.19277211114688542, "learning_rate": 7.218972989210616e-07, "loss": 0.7763, "step": 2296 }, { "epoch": 0.9154129720035867, "grad_norm": 0.19199075944716948, "learning_rate": 7.151754277304657e-07, "loss": 0.7568, "step": 2297 }, { "epoch": 0.9158114974594003, "grad_norm": 0.19072158788713017, "learning_rate": 7.084844283390823e-07, "loss": 0.7915, "step": 2298 }, { "epoch": 0.9162100229152137, "grad_norm": 0.19205282392375037, "learning_rate": 7.018243114580858e-07, "loss": 0.8034, "step": 2299 }, { "epoch": 0.9166085483710272, "grad_norm": 0.2052923264205816, "learning_rate": 6.951950877492209e-07, "loss": 0.7857, "step": 2300 }, { "epoch": 0.9170070738268407, "grad_norm": 0.22779157975014266, "learning_rate": 6.885967678247652e-07, "loss": 0.756, "step": 2301 }, { "epoch": 0.9174055992826542, "grad_norm": 0.18774142177297953, "learning_rate": 6.820293622475427e-07, "loss": 0.7857, "step": 2302 }, { "epoch": 0.9178041247384676, "grad_norm": 0.19498696530660528, "learning_rate": 6.754928815308703e-07, "loss": 0.7991, "step": 2303 }, { "epoch": 0.9182026501942812, "grad_norm": 0.19209043390951142, "learning_rate": 6.689873361385691e-07, "loss": 0.8101, "step": 2304 }, { "epoch": 0.9186011756500947, "grad_norm": 0.19290885228459345, "learning_rate": 6.625127364849371e-07, "loss": 0.7955, "step": 2305 }, { "epoch": 0.9189997011059081, "grad_norm": 0.1877743297868329, "learning_rate": 6.560690929347324e-07, "loss": 0.7844, "step": 2306 }, { "epoch": 0.9193982265617217, "grad_norm": 0.19214675198757558, "learning_rate": 6.49656415803157e-07, "loss": 0.7903, "step": 2307 }, { "epoch": 0.9197967520175351, "grad_norm": 0.19219057718417967, "learning_rate": 6.432747153558416e-07, "loss": 0.7761, "step": 2308 }, { "epoch": 0.9201952774733486, "grad_norm": 0.18838660622383804, "learning_rate": 6.369240018088297e-07, "loss": 0.7947, "step": 2309 }, { "epoch": 0.9205938029291622, "grad_norm": 0.1886108613905356, "learning_rate": 6.306042853285532e-07, "loss": 0.7813, "step": 2310 }, { "epoch": 0.9209923283849756, "grad_norm": 0.1925293191301323, "learning_rate": 6.243155760318332e-07, "loss": 0.7982, "step": 2311 }, { "epoch": 0.9213908538407891, "grad_norm": 0.19530492377194633, "learning_rate": 6.180578839858475e-07, "loss": 0.7885, "step": 2312 }, { "epoch": 0.9217893792966025, "grad_norm": 0.18651121519218392, "learning_rate": 6.118312192081166e-07, "loss": 0.7949, "step": 2313 }, { "epoch": 0.9221879047524161, "grad_norm": 0.19338094401034905, "learning_rate": 6.056355916665024e-07, "loss": 0.7717, "step": 2314 }, { "epoch": 0.9225864302082295, "grad_norm": 0.18423336706407692, "learning_rate": 5.994710112791713e-07, "loss": 0.7811, "step": 2315 }, { "epoch": 0.922984955664043, "grad_norm": 0.18939928604114048, "learning_rate": 5.933374879145893e-07, "loss": 0.7755, "step": 2316 }, { "epoch": 0.9233834811198566, "grad_norm": 0.1926905369480336, "learning_rate": 5.872350313915131e-07, "loss": 0.8114, "step": 2317 }, { "epoch": 0.92378200657567, "grad_norm": 0.19646582733405174, "learning_rate": 5.811636514789598e-07, "loss": 0.7871, "step": 2318 }, { "epoch": 0.9241805320314835, "grad_norm": 0.19298296374648816, "learning_rate": 5.75123357896199e-07, "loss": 0.8039, "step": 2319 }, { "epoch": 0.924579057487297, "grad_norm": 0.190458055205602, "learning_rate": 5.691141603127381e-07, "loss": 0.7835, "step": 2320 }, { "epoch": 0.9249775829431105, "grad_norm": 0.1915360703578091, "learning_rate": 5.631360683483001e-07, "loss": 0.8234, "step": 2321 }, { "epoch": 0.925376108398924, "grad_norm": 0.23747828168438873, "learning_rate": 5.571890915728206e-07, "loss": 0.79, "step": 2322 }, { "epoch": 0.9257746338547375, "grad_norm": 0.19153058747182247, "learning_rate": 5.512732395064224e-07, "loss": 0.7649, "step": 2323 }, { "epoch": 0.926173159310551, "grad_norm": 0.19727254538457217, "learning_rate": 5.453885216193988e-07, "loss": 0.8349, "step": 2324 }, { "epoch": 0.9265716847663644, "grad_norm": 0.1951429962580588, "learning_rate": 5.395349473322032e-07, "loss": 0.7978, "step": 2325 }, { "epoch": 0.926970210222178, "grad_norm": 0.18510338682179783, "learning_rate": 5.337125260154397e-07, "loss": 0.7777, "step": 2326 }, { "epoch": 0.9273687356779914, "grad_norm": 0.1946540136821385, "learning_rate": 5.279212669898326e-07, "loss": 0.8047, "step": 2327 }, { "epoch": 0.9277672611338049, "grad_norm": 0.18491969397571634, "learning_rate": 5.221611795262283e-07, "loss": 0.7573, "step": 2328 }, { "epoch": 0.9281657865896185, "grad_norm": 0.19372900242022098, "learning_rate": 5.164322728455684e-07, "loss": 0.8202, "step": 2329 }, { "epoch": 0.9285643120454319, "grad_norm": 0.19227025991711344, "learning_rate": 5.107345561188836e-07, "loss": 0.7805, "step": 2330 }, { "epoch": 0.9289628375012454, "grad_norm": 0.1884216178497241, "learning_rate": 5.050680384672668e-07, "loss": 0.7911, "step": 2331 }, { "epoch": 0.9293613629570588, "grad_norm": 0.19064898796693053, "learning_rate": 4.994327289618728e-07, "loss": 0.8286, "step": 2332 }, { "epoch": 0.9297598884128724, "grad_norm": 0.19168131739896943, "learning_rate": 4.938286366238942e-07, "loss": 0.7741, "step": 2333 }, { "epoch": 0.9301584138686858, "grad_norm": 0.19521237858027687, "learning_rate": 4.88255770424555e-07, "loss": 0.806, "step": 2334 }, { "epoch": 0.9305569393244993, "grad_norm": 0.19197627577848786, "learning_rate": 4.827141392850876e-07, "loss": 0.7898, "step": 2335 }, { "epoch": 0.9309554647803129, "grad_norm": 0.19415977793697126, "learning_rate": 4.772037520767181e-07, "loss": 0.7764, "step": 2336 }, { "epoch": 0.9313539902361263, "grad_norm": 0.18652836321806102, "learning_rate": 4.7172461762066356e-07, "loss": 0.8058, "step": 2337 }, { "epoch": 0.9317525156919398, "grad_norm": 0.1894930310367945, "learning_rate": 4.662767446881078e-07, "loss": 0.7747, "step": 2338 }, { "epoch": 0.9321510411477533, "grad_norm": 0.19105060903289703, "learning_rate": 4.6086014200018793e-07, "loss": 0.7969, "step": 2339 }, { "epoch": 0.9325495666035668, "grad_norm": 0.18939524754458784, "learning_rate": 4.5547481822799e-07, "loss": 0.775, "step": 2340 }, { "epoch": 0.9329480920593803, "grad_norm": 0.18595385386063937, "learning_rate": 4.5012078199251576e-07, "loss": 0.7898, "step": 2341 }, { "epoch": 0.9333466175151938, "grad_norm": 0.19033445395137963, "learning_rate": 4.4479804186469353e-07, "loss": 0.7734, "step": 2342 }, { "epoch": 0.9337451429710073, "grad_norm": 0.18559439244342524, "learning_rate": 4.3950660636534084e-07, "loss": 0.7788, "step": 2343 }, { "epoch": 0.9341436684268207, "grad_norm": 0.20079834918402295, "learning_rate": 4.342464839651661e-07, "loss": 0.8214, "step": 2344 }, { "epoch": 0.9345421938826343, "grad_norm": 0.1912887265583529, "learning_rate": 4.290176830847559e-07, "loss": 0.7846, "step": 2345 }, { "epoch": 0.9349407193384477, "grad_norm": 0.18960450357073091, "learning_rate": 4.238202120945478e-07, "loss": 0.7669, "step": 2346 }, { "epoch": 0.9353392447942612, "grad_norm": 0.19153974072758262, "learning_rate": 4.186540793148308e-07, "loss": 0.812, "step": 2347 }, { "epoch": 0.9357377702500748, "grad_norm": 0.18970776862717967, "learning_rate": 4.13519293015725e-07, "loss": 0.8019, "step": 2348 }, { "epoch": 0.9361362957058882, "grad_norm": 0.18839937884535282, "learning_rate": 4.084158614171685e-07, "loss": 0.7991, "step": 2349 }, { "epoch": 0.9365348211617017, "grad_norm": 0.19300125889679948, "learning_rate": 4.033437926889061e-07, "loss": 0.7821, "step": 2350 }, { "epoch": 0.9369333466175152, "grad_norm": 0.18898379977540675, "learning_rate": 3.983030949504829e-07, "loss": 0.7919, "step": 2351 }, { "epoch": 0.9373318720733287, "grad_norm": 0.19196900837664088, "learning_rate": 3.932937762712108e-07, "loss": 0.7896, "step": 2352 }, { "epoch": 0.9377303975291422, "grad_norm": 0.19925612955481922, "learning_rate": 3.883158446701796e-07, "loss": 0.8139, "step": 2353 }, { "epoch": 0.9381289229849556, "grad_norm": 0.18497982043923966, "learning_rate": 3.833693081162326e-07, "loss": 0.805, "step": 2354 }, { "epoch": 0.9385274484407692, "grad_norm": 0.19343018564847927, "learning_rate": 3.784541745279491e-07, "loss": 0.7965, "step": 2355 }, { "epoch": 0.9389259738965826, "grad_norm": 0.2122663845906148, "learning_rate": 3.735704517736438e-07, "loss": 0.7731, "step": 2356 }, { "epoch": 0.9393244993523961, "grad_norm": 0.19304292990391148, "learning_rate": 3.6871814767134305e-07, "loss": 0.7985, "step": 2357 }, { "epoch": 0.9397230248082096, "grad_norm": 0.18892205141585297, "learning_rate": 3.638972699887822e-07, "loss": 0.8119, "step": 2358 }, { "epoch": 0.9401215502640231, "grad_norm": 0.1913297127895265, "learning_rate": 3.5910782644338336e-07, "loss": 0.7902, "step": 2359 }, { "epoch": 0.9405200757198366, "grad_norm": 0.19033240742898452, "learning_rate": 3.543498247022492e-07, "loss": 0.7575, "step": 2360 }, { "epoch": 0.9409186011756501, "grad_norm": 0.18332766514040255, "learning_rate": 3.4962327238215134e-07, "loss": 0.7598, "step": 2361 }, { "epoch": 0.9413171266314636, "grad_norm": 0.18186761156092401, "learning_rate": 3.449281770495105e-07, "loss": 0.7943, "step": 2362 }, { "epoch": 0.941715652087277, "grad_norm": 0.1965958953954756, "learning_rate": 3.402645462204013e-07, "loss": 0.8086, "step": 2363 }, { "epoch": 0.9421141775430906, "grad_norm": 0.19135591876413088, "learning_rate": 3.3563238736051604e-07, "loss": 0.804, "step": 2364 }, { "epoch": 0.9425127029989041, "grad_norm": 0.18843239133027376, "learning_rate": 3.310317078851744e-07, "loss": 0.7751, "step": 2365 }, { "epoch": 0.9429112284547175, "grad_norm": 0.1944972137264629, "learning_rate": 3.2646251515929597e-07, "loss": 0.7862, "step": 2366 }, { "epoch": 0.9433097539105311, "grad_norm": 0.19037760409725837, "learning_rate": 3.2192481649740095e-07, "loss": 0.8166, "step": 2367 }, { "epoch": 0.9437082793663445, "grad_norm": 0.1871623371191181, "learning_rate": 3.1741861916359193e-07, "loss": 0.7655, "step": 2368 }, { "epoch": 0.944106804822158, "grad_norm": 0.18764789979300736, "learning_rate": 3.129439303715387e-07, "loss": 0.7942, "step": 2369 }, { "epoch": 0.9445053302779715, "grad_norm": 0.18934527512136454, "learning_rate": 3.0850075728448e-07, "loss": 0.8114, "step": 2370 }, { "epoch": 0.944903855733785, "grad_norm": 0.18732805849733797, "learning_rate": 3.0408910701519303e-07, "loss": 0.783, "step": 2371 }, { "epoch": 0.9453023811895985, "grad_norm": 0.20572141307841002, "learning_rate": 2.997089866259972e-07, "loss": 0.8062, "step": 2372 }, { "epoch": 0.945700906645412, "grad_norm": 0.19247170516758175, "learning_rate": 2.953604031287349e-07, "loss": 0.8098, "step": 2373 }, { "epoch": 0.9460994321012255, "grad_norm": 0.18598311478233595, "learning_rate": 2.910433634847709e-07, "loss": 0.7549, "step": 2374 }, { "epoch": 0.9464979575570389, "grad_norm": 0.18855187031366835, "learning_rate": 2.8675787460496816e-07, "loss": 0.7688, "step": 2375 }, { "epoch": 0.9468964830128525, "grad_norm": 0.18656543479412127, "learning_rate": 2.8250394334967903e-07, "loss": 0.7844, "step": 2376 }, { "epoch": 0.947295008468666, "grad_norm": 0.1987614281014416, "learning_rate": 2.7828157652874054e-07, "loss": 0.7873, "step": 2377 }, { "epoch": 0.9476935339244794, "grad_norm": 0.18946005010606964, "learning_rate": 2.7409078090146144e-07, "loss": 0.7919, "step": 2378 }, { "epoch": 0.948092059380293, "grad_norm": 0.18729259602203205, "learning_rate": 2.699315631766064e-07, "loss": 0.7906, "step": 2379 }, { "epoch": 0.9484905848361064, "grad_norm": 0.21584635145950695, "learning_rate": 2.6580393001239604e-07, "loss": 0.7525, "step": 2380 }, { "epoch": 0.9488891102919199, "grad_norm": 0.19301764265768684, "learning_rate": 2.617078880164825e-07, "loss": 0.796, "step": 2381 }, { "epoch": 0.9492876357477333, "grad_norm": 0.18867210144130342, "learning_rate": 2.5764344374595187e-07, "loss": 0.8082, "step": 2382 }, { "epoch": 0.9496861612035469, "grad_norm": 0.18458535962402378, "learning_rate": 2.5361060370729715e-07, "loss": 0.7828, "step": 2383 }, { "epoch": 0.9500846866593604, "grad_norm": 0.18940087074242587, "learning_rate": 2.496093743564321e-07, "loss": 0.7912, "step": 2384 }, { "epoch": 0.9504832121151738, "grad_norm": 0.1967469512602545, "learning_rate": 2.4563976209865504e-07, "loss": 0.795, "step": 2385 }, { "epoch": 0.9508817375709874, "grad_norm": 0.18106777816661615, "learning_rate": 2.417017732886562e-07, "loss": 0.7606, "step": 2386 }, { "epoch": 0.9512802630268008, "grad_norm": 0.19029595392071927, "learning_rate": 2.377954142305039e-07, "loss": 0.7953, "step": 2387 }, { "epoch": 0.9516787884826143, "grad_norm": 0.1920867643492066, "learning_rate": 2.3392069117762706e-07, "loss": 0.7959, "step": 2388 }, { "epoch": 0.9520773139384279, "grad_norm": 0.18771665475461194, "learning_rate": 2.300776103328173e-07, "loss": 0.7736, "step": 2389 }, { "epoch": 0.9524758393942413, "grad_norm": 0.18980640789999415, "learning_rate": 2.2626617784820225e-07, "loss": 0.7606, "step": 2390 }, { "epoch": 0.9528743648500548, "grad_norm": 0.18734395867405335, "learning_rate": 2.2248639982525688e-07, "loss": 0.7989, "step": 2391 }, { "epoch": 0.9532728903058683, "grad_norm": 0.2160018240089795, "learning_rate": 2.1873828231477433e-07, "loss": 0.7957, "step": 2392 }, { "epoch": 0.9536714157616818, "grad_norm": 0.1868607933405286, "learning_rate": 2.150218313168706e-07, "loss": 0.8183, "step": 2393 }, { "epoch": 0.9540699412174952, "grad_norm": 0.18834121472126297, "learning_rate": 2.113370527809644e-07, "loss": 0.7748, "step": 2394 }, { "epoch": 0.9544684666733088, "grad_norm": 0.1866371434060772, "learning_rate": 2.07683952605775e-07, "loss": 0.7682, "step": 2395 }, { "epoch": 0.9548669921291223, "grad_norm": 0.19477979464068296, "learning_rate": 2.0406253663930675e-07, "loss": 0.7962, "step": 2396 }, { "epoch": 0.9552655175849357, "grad_norm": 0.18710243595192597, "learning_rate": 2.0047281067884672e-07, "loss": 0.7971, "step": 2397 }, { "epoch": 0.9556640430407493, "grad_norm": 0.18958183027461045, "learning_rate": 1.9691478047094924e-07, "loss": 0.7851, "step": 2398 }, { "epoch": 0.9560625684965627, "grad_norm": 0.18583596188293008, "learning_rate": 1.9338845171142928e-07, "loss": 0.7729, "step": 2399 }, { "epoch": 0.9564610939523762, "grad_norm": 0.1846233484812463, "learning_rate": 1.8989383004535121e-07, "loss": 0.7797, "step": 2400 }, { "epoch": 0.9568596194081896, "grad_norm": 0.18844365183443165, "learning_rate": 1.86430921067029e-07, "loss": 0.7869, "step": 2401 }, { "epoch": 0.9572581448640032, "grad_norm": 0.1928283675611628, "learning_rate": 1.8299973031999707e-07, "loss": 0.8196, "step": 2402 }, { "epoch": 0.9576566703198167, "grad_norm": 0.1871302583547213, "learning_rate": 1.7960026329702618e-07, "loss": 0.7688, "step": 2403 }, { "epoch": 0.9580551957756301, "grad_norm": 0.1819281786572131, "learning_rate": 1.762325254400965e-07, "loss": 0.7745, "step": 2404 }, { "epoch": 0.9584537212314437, "grad_norm": 0.18580491353142833, "learning_rate": 1.7289652214039775e-07, "loss": 0.7688, "step": 2405 }, { "epoch": 0.9588522466872571, "grad_norm": 0.18495610694488546, "learning_rate": 1.6959225873831586e-07, "loss": 0.7863, "step": 2406 }, { "epoch": 0.9592507721430706, "grad_norm": 0.18788375628377132, "learning_rate": 1.6631974052342846e-07, "loss": 0.7826, "step": 2407 }, { "epoch": 0.9596492975988842, "grad_norm": 0.18917657214611222, "learning_rate": 1.6307897273449168e-07, "loss": 0.7734, "step": 2408 }, { "epoch": 0.9600478230546976, "grad_norm": 0.18759506045359045, "learning_rate": 1.5986996055943781e-07, "loss": 0.7992, "step": 2409 }, { "epoch": 0.9604463485105111, "grad_norm": 0.1916191115268579, "learning_rate": 1.5669270913536427e-07, "loss": 0.8289, "step": 2410 }, { "epoch": 0.9608448739663246, "grad_norm": 0.18451542468901574, "learning_rate": 1.535472235485158e-07, "loss": 0.7726, "step": 2411 }, { "epoch": 0.9612433994221381, "grad_norm": 0.18676157641440086, "learning_rate": 1.5043350883429786e-07, "loss": 0.7922, "step": 2412 }, { "epoch": 0.9616419248779515, "grad_norm": 0.1872437071497714, "learning_rate": 1.4735156997724765e-07, "loss": 0.7802, "step": 2413 }, { "epoch": 0.9620404503337651, "grad_norm": 0.18907840330520773, "learning_rate": 1.4430141191103865e-07, "loss": 0.7903, "step": 2414 }, { "epoch": 0.9624389757895786, "grad_norm": 0.18712650053474555, "learning_rate": 1.41283039518465e-07, "loss": 0.7993, "step": 2415 }, { "epoch": 0.962837501245392, "grad_norm": 0.19060675078211464, "learning_rate": 1.3829645763144162e-07, "loss": 0.7952, "step": 2416 }, { "epoch": 0.9632360267012056, "grad_norm": 0.19012885112510405, "learning_rate": 1.353416710309885e-07, "loss": 0.7988, "step": 2417 }, { "epoch": 0.963634552157019, "grad_norm": 0.18668957461300054, "learning_rate": 1.324186844472264e-07, "loss": 0.7676, "step": 2418 }, { "epoch": 0.9640330776128325, "grad_norm": 0.18246864478928232, "learning_rate": 1.295275025593745e-07, "loss": 0.7837, "step": 2419 }, { "epoch": 0.9644316030686461, "grad_norm": 0.18700001914029454, "learning_rate": 1.2666812999573064e-07, "loss": 0.7841, "step": 2420 }, { "epoch": 0.9648301285244595, "grad_norm": 0.18732989426938132, "learning_rate": 1.2384057133367988e-07, "loss": 0.7682, "step": 2421 }, { "epoch": 0.965228653980273, "grad_norm": 0.18764475457309285, "learning_rate": 1.2104483109967035e-07, "loss": 0.7989, "step": 2422 }, { "epoch": 0.9656271794360864, "grad_norm": 0.18205691271384167, "learning_rate": 1.1828091376921758e-07, "loss": 0.761, "step": 2423 }, { "epoch": 0.9660257048919, "grad_norm": 0.18817192206412373, "learning_rate": 1.1554882376689557e-07, "loss": 0.795, "step": 2424 }, { "epoch": 0.9664242303477134, "grad_norm": 0.18225533752041095, "learning_rate": 1.1284856546632583e-07, "loss": 0.7544, "step": 2425 }, { "epoch": 0.966822755803527, "grad_norm": 0.1888654968857259, "learning_rate": 1.1018014319017056e-07, "loss": 0.7938, "step": 2426 }, { "epoch": 0.9672212812593405, "grad_norm": 0.18791322397897098, "learning_rate": 1.0754356121013276e-07, "loss": 0.8, "step": 2427 }, { "epoch": 0.9676198067151539, "grad_norm": 0.18588223430788398, "learning_rate": 1.0493882374694287e-07, "loss": 0.7909, "step": 2428 }, { "epoch": 0.9680183321709674, "grad_norm": 0.18541611026382643, "learning_rate": 1.0236593497035208e-07, "loss": 0.7986, "step": 2429 }, { "epoch": 0.9684168576267809, "grad_norm": 0.18672094333830974, "learning_rate": 9.982489899912573e-08, "loss": 0.7854, "step": 2430 }, { "epoch": 0.9688153830825944, "grad_norm": 0.1943543110096466, "learning_rate": 9.731571990104105e-08, "loss": 0.798, "step": 2431 }, { "epoch": 0.9692139085384079, "grad_norm": 0.19085690756684667, "learning_rate": 9.483840169287828e-08, "loss": 0.7845, "step": 2432 }, { "epoch": 0.9696124339942214, "grad_norm": 0.22770637291835538, "learning_rate": 9.239294834041179e-08, "loss": 0.8013, "step": 2433 }, { "epoch": 0.9700109594500349, "grad_norm": 0.21119203670687375, "learning_rate": 8.997936375840566e-08, "loss": 0.8095, "step": 2434 }, { "epoch": 0.9704094849058483, "grad_norm": 0.18505204962281271, "learning_rate": 8.759765181060698e-08, "loss": 0.7804, "step": 2435 }, { "epoch": 0.9708080103616619, "grad_norm": 0.18558037459333185, "learning_rate": 8.524781630974144e-08, "loss": 0.7941, "step": 2436 }, { "epoch": 0.9712065358174753, "grad_norm": 0.18719202865767845, "learning_rate": 8.292986101750222e-08, "loss": 0.8026, "step": 2437 }, { "epoch": 0.9716050612732888, "grad_norm": 0.18361106837972332, "learning_rate": 8.064378964455666e-08, "loss": 0.7835, "step": 2438 }, { "epoch": 0.9720035867291024, "grad_norm": 0.1941218667296314, "learning_rate": 7.838960585051959e-08, "loss": 0.7761, "step": 2439 }, { "epoch": 0.9724021121849158, "grad_norm": 0.1875724989599244, "learning_rate": 7.616731324396887e-08, "loss": 0.7837, "step": 2440 }, { "epoch": 0.9728006376407293, "grad_norm": 0.19069373972701836, "learning_rate": 7.397691538242103e-08, "loss": 0.8045, "step": 2441 }, { "epoch": 0.9731991630965428, "grad_norm": 0.18927346704482184, "learning_rate": 7.181841577234449e-08, "loss": 0.8012, "step": 2442 }, { "epoch": 0.9735976885523563, "grad_norm": 0.18820692363643488, "learning_rate": 6.969181786913304e-08, "loss": 0.7829, "step": 2443 }, { "epoch": 0.9739962140081698, "grad_norm": 0.18304987062051034, "learning_rate": 6.759712507711902e-08, "loss": 0.7697, "step": 2444 }, { "epoch": 0.9743947394639833, "grad_norm": 0.19039574921948385, "learning_rate": 6.553434074955789e-08, "loss": 0.7909, "step": 2445 }, { "epoch": 0.9747932649197968, "grad_norm": 0.19093438505230914, "learning_rate": 6.350346818862374e-08, "loss": 0.8287, "step": 2446 }, { "epoch": 0.9751917903756102, "grad_norm": 0.18824893139518173, "learning_rate": 6.150451064540708e-08, "loss": 0.7963, "step": 2447 }, { "epoch": 0.9755903158314237, "grad_norm": 0.18807173681160894, "learning_rate": 5.953747131990595e-08, "loss": 0.7839, "step": 2448 }, { "epoch": 0.9759888412872372, "grad_norm": 0.1833502517025838, "learning_rate": 5.760235336102149e-08, "loss": 0.7594, "step": 2449 }, { "epoch": 0.9763873667430507, "grad_norm": 0.3654620173098497, "learning_rate": 5.569915986656016e-08, "loss": 0.7682, "step": 2450 }, { "epoch": 0.9767858921988642, "grad_norm": 0.18924038600883106, "learning_rate": 5.3827893883215964e-08, "loss": 0.7996, "step": 2451 }, { "epoch": 0.9771844176546777, "grad_norm": 0.1903600412956394, "learning_rate": 5.198855840657491e-08, "loss": 0.8085, "step": 2452 }, { "epoch": 0.9775829431104912, "grad_norm": 0.1869548438274652, "learning_rate": 5.01811563811061e-08, "loss": 0.8068, "step": 2453 }, { "epoch": 0.9779814685663046, "grad_norm": 0.18864866332212885, "learning_rate": 4.8405690700161766e-08, "loss": 0.7886, "step": 2454 }, { "epoch": 0.9783799940221182, "grad_norm": 0.18929120724640708, "learning_rate": 4.6662164205966143e-08, "loss": 0.7762, "step": 2455 }, { "epoch": 0.9787785194779317, "grad_norm": 0.1917795489994272, "learning_rate": 4.495057968961769e-08, "loss": 0.7884, "step": 2456 }, { "epoch": 0.9791770449337451, "grad_norm": 0.39039385650289893, "learning_rate": 4.327093989107578e-08, "loss": 0.7692, "step": 2457 }, { "epoch": 0.9795755703895587, "grad_norm": 0.18426417807456694, "learning_rate": 4.162324749916735e-08, "loss": 0.7869, "step": 2458 }, { "epoch": 0.9799740958453721, "grad_norm": 0.18680439549269473, "learning_rate": 4.0007505151571365e-08, "loss": 0.751, "step": 2459 }, { "epoch": 0.9803726213011856, "grad_norm": 0.1818392556169463, "learning_rate": 3.8423715434823264e-08, "loss": 0.7696, "step": 2460 }, { "epoch": 0.9807711467569991, "grad_norm": 0.18570844625506863, "learning_rate": 3.6871880884310486e-08, "loss": 0.7886, "step": 2461 }, { "epoch": 0.9811696722128126, "grad_norm": 0.21101630534654273, "learning_rate": 3.5352003984259195e-08, "loss": 0.7831, "step": 2462 }, { "epoch": 0.9815681976686261, "grad_norm": 0.18069209360220204, "learning_rate": 3.3864087167738705e-08, "loss": 0.7504, "step": 2463 }, { "epoch": 0.9819667231244396, "grad_norm": 0.1966709155844413, "learning_rate": 3.240813281666144e-08, "loss": 0.8465, "step": 2464 }, { "epoch": 0.9823652485802531, "grad_norm": 0.1932914272847854, "learning_rate": 3.09841432617608e-08, "loss": 0.8142, "step": 2465 }, { "epoch": 0.9827637740360665, "grad_norm": 0.18925513736084928, "learning_rate": 2.959212078261553e-08, "loss": 0.7721, "step": 2466 }, { "epoch": 0.98316229949188, "grad_norm": 0.19004539946693746, "learning_rate": 2.823206760761643e-08, "loss": 0.8169, "step": 2467 }, { "epoch": 0.9835608249476935, "grad_norm": 0.1836565966256934, "learning_rate": 2.690398591398413e-08, "loss": 0.7499, "step": 2468 }, { "epoch": 0.983959350403507, "grad_norm": 0.18503029280898622, "learning_rate": 2.5607877827757975e-08, "loss": 0.8015, "step": 2469 }, { "epoch": 0.9843578758593206, "grad_norm": 0.20802778839564787, "learning_rate": 2.4343745423791588e-08, "loss": 0.8196, "step": 2470 }, { "epoch": 0.984756401315134, "grad_norm": 0.18807720140750997, "learning_rate": 2.3111590725750644e-08, "loss": 0.7935, "step": 2471 }, { "epoch": 0.9851549267709475, "grad_norm": 0.18198059255273902, "learning_rate": 2.191141570610844e-08, "loss": 0.784, "step": 2472 }, { "epoch": 0.9855534522267609, "grad_norm": 0.18391481474432778, "learning_rate": 2.074322228614589e-08, "loss": 0.7844, "step": 2473 }, { "epoch": 0.9859519776825745, "grad_norm": 0.18825854264243994, "learning_rate": 1.9607012335949306e-08, "loss": 0.7916, "step": 2474 }, { "epoch": 0.986350503138388, "grad_norm": 0.18782508477662568, "learning_rate": 1.850278767439928e-08, "loss": 0.7595, "step": 2475 }, { "epoch": 0.9867490285942014, "grad_norm": 0.18321843468534266, "learning_rate": 1.7430550069175157e-08, "loss": 0.7797, "step": 2476 }, { "epoch": 0.987147554050015, "grad_norm": 0.18648381218274565, "learning_rate": 1.6390301236755003e-08, "loss": 0.7851, "step": 2477 }, { "epoch": 0.9875460795058284, "grad_norm": 0.18500156564930573, "learning_rate": 1.53820428424023e-08, "loss": 0.7918, "step": 2478 }, { "epoch": 0.9879446049616419, "grad_norm": 0.18650103609208593, "learning_rate": 1.4405776500170388e-08, "loss": 0.7453, "step": 2479 }, { "epoch": 0.9883431304174554, "grad_norm": 0.18474307243866062, "learning_rate": 1.346150377290023e-08, "loss": 0.812, "step": 2480 }, { "epoch": 0.9887416558732689, "grad_norm": 0.18500951818710418, "learning_rate": 1.2549226172213769e-08, "loss": 0.7823, "step": 2481 }, { "epoch": 0.9891401813290824, "grad_norm": 0.18520599480715474, "learning_rate": 1.1668945158518352e-08, "loss": 0.8023, "step": 2482 }, { "epoch": 0.9895387067848959, "grad_norm": 0.18653565333341804, "learning_rate": 1.0820662140997862e-08, "loss": 0.803, "step": 2483 }, { "epoch": 0.9899372322407094, "grad_norm": 0.18774709338075257, "learning_rate": 1.0004378477610489e-08, "loss": 0.82, "step": 2484 }, { "epoch": 0.9903357576965228, "grad_norm": 0.18995043820529847, "learning_rate": 9.220095475090951e-09, "loss": 0.795, "step": 2485 }, { "epoch": 0.9907342831523364, "grad_norm": 0.20276705829265076, "learning_rate": 8.467814388948282e-09, "loss": 0.7578, "step": 2486 }, { "epoch": 0.9911328086081499, "grad_norm": 0.18309866901765423, "learning_rate": 7.747536423456937e-09, "loss": 0.7786, "step": 2487 }, { "epoch": 0.9915313340639633, "grad_norm": 0.18809540486983145, "learning_rate": 7.059262731661243e-09, "loss": 0.8164, "step": 2488 }, { "epoch": 0.9919298595197769, "grad_norm": 0.18502562806451164, "learning_rate": 6.402994415377617e-09, "loss": 0.7805, "step": 2489 }, { "epoch": 0.9923283849755903, "grad_norm": 0.1859120952234941, "learning_rate": 5.7787325251768e-09, "loss": 0.7834, "step": 2490 }, { "epoch": 0.9927269104314038, "grad_norm": 0.1846277966595508, "learning_rate": 5.186478060403844e-09, "loss": 0.7745, "step": 2491 }, { "epoch": 0.9931254358872172, "grad_norm": 0.18693650346096127, "learning_rate": 4.626231969155903e-09, "loss": 0.7965, "step": 2492 }, { "epoch": 0.9935239613430308, "grad_norm": 0.18817760919614965, "learning_rate": 4.0979951482955636e-09, "loss": 0.817, "step": 2493 }, { "epoch": 0.9939224867988443, "grad_norm": 0.18262042873834777, "learning_rate": 3.6017684434397348e-09, "loss": 0.7637, "step": 2494 }, { "epoch": 0.9943210122546577, "grad_norm": 0.18654490339612353, "learning_rate": 3.1375526489685337e-09, "loss": 0.7688, "step": 2495 }, { "epoch": 0.9947195377104713, "grad_norm": 0.18607152410344097, "learning_rate": 2.7053485080141827e-09, "loss": 0.7929, "step": 2496 }, { "epoch": 0.9951180631662847, "grad_norm": 0.18419897844519703, "learning_rate": 2.3051567124587894e-09, "loss": 0.7701, "step": 2497 }, { "epoch": 0.9955165886220982, "grad_norm": 0.19186493723661452, "learning_rate": 1.936977902949888e-09, "loss": 0.808, "step": 2498 }, { "epoch": 0.9959151140779118, "grad_norm": 0.19815019961308503, "learning_rate": 1.6008126688737968e-09, "loss": 0.7752, "step": 2499 }, { "epoch": 0.9963136395337252, "grad_norm": 0.18417877206027317, "learning_rate": 1.2966615483800404e-09, "loss": 0.7644, "step": 2500 }, { "epoch": 0.9967121649895387, "grad_norm": 0.18546354005657695, "learning_rate": 1.0245250283613672e-09, "loss": 0.7876, "step": 2501 }, { "epoch": 0.9971106904453522, "grad_norm": 0.1876008897484704, "learning_rate": 7.844035444648512e-10, "loss": 0.8233, "step": 2502 }, { "epoch": 0.9975092159011657, "grad_norm": 0.19981491316640151, "learning_rate": 5.762974810852307e-10, "loss": 0.7684, "step": 2503 }, { "epoch": 0.9979077413569791, "grad_norm": 0.18665647757266646, "learning_rate": 4.002071713626876e-10, "loss": 0.7954, "step": 2504 }, { "epoch": 0.9983062668127927, "grad_norm": 0.1894225254309972, "learning_rate": 2.5613289719172985e-10, "loss": 0.788, "step": 2505 }, { "epoch": 0.9987047922686062, "grad_norm": 0.187487701192293, "learning_rate": 1.440748892100885e-10, "loss": 0.8017, "step": 2506 }, { "epoch": 0.9991033177244196, "grad_norm": 0.1916417262477468, "learning_rate": 6.403332680537943e-11, "loss": 0.803, "step": 2507 }, { "epoch": 0.9995018431802332, "grad_norm": 0.18369932510494486, "learning_rate": 1.6008338108441936e-11, "loss": 0.7746, "step": 2508 }, { "epoch": 0.9999003686360466, "grad_norm": 0.19092333059410313, "learning_rate": 0.0, "loss": 0.7752, "step": 2509 }, { "epoch": 0.9999003686360466, "step": 2509, "total_flos": 2400250660651008.0, "train_loss": 0.8378047743086893, "train_runtime": 25259.4249, "train_samples_per_second": 57.218, "train_steps_per_second": 0.099 } ], "logging_steps": 1, "max_steps": 2509, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2400250660651008.0, "train_batch_size": 9, "trial_name": null, "trial_params": null }